From bb7644a549427e5b3c23e6850c18fdc83ec3e440 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 20:52:15 -0600 Subject: [PATCH 01/10] feat(notion): config-driven Command Center reader (PII-safe, Mira-free) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integration increment 2. Generalizes tools/notion_client.py from the two hard-coded Mira DB ids into a reader over the whole Notion ops hub, so the in-app Ops Copilot can answer from the staff's real system (the functional Mira replacement). Purely additive — existing fetch_installations/fetch_feedback untouched. - _db_id(db_key): resolves a Command Center DB id from config.yaml notion.databases., env fallback NOTION__DB_ID. DB ids are business config, not secrets; NOTION_TOKEN stays the only credential. - fetch_status_counts(db_key): count-by-status reader, PII-FREE BY CONSTRUCTION -- reads ONLY the status/phase column (alias-matched), never any other property, so no customer identity / dollar figure / free text can enter the result, however the operator names or adds columns. - is_command_center_enabled(): NOTION_COMMAND_CENTER master flag (default off); the consumer gates on it -> one-env-var flip + instant revert (INVENTORY_SOURCE pattern). - _find_prop now strips whitespace, tolerating dirty Notion schemas (trailing-space / duplicate column names). Tests: +5 (count correctness, PII never in result, dirty/aliased status columns, env resolver, unconfigured -> {}). 15 green, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_notion_client.py | 67 ++++++++++++++++++++++++ tools/notion_client.py | 100 ++++++++++++++++++++++++++++++++++-- 2 files changed, 164 insertions(+), 3 deletions(-) diff --git a/tests/test_notion_client.py b/tests/test_notion_client.py index 7fa0368..03c1d5a 100644 --- a/tests/test_notion_client.py +++ b/tests/test_notion_client.py @@ -242,3 +242,70 @@ def test_fetch_skips_non_dict_results(notion_env, monkeypatch): rows = nc.fetch_feedback() assert len(rows) == 1 assert rows[0]["id"] == "page-fb-1" + + +# --------------------------------------------------------------------------- # +# Command Center reader (config-driven, GCP-native — increment 2) +# --------------------------------------------------------------------------- # +def _ops_page(status, *, prop_name="Status", with_pii=True): + props = {prop_name: _status(status)} + if with_pii: + # PII columns that must NEVER reach a count-by-status result: + props["Customer Name"] = _title("Jane Buyer") + props["Phone Number"] = {"type": "phone_number", "phone_number": "555-0000"} + props["Escrow Balance"] = _number(12345.67) + return {"id": f"pg-{status}", "created_time": "2026-06-14T10:00:00.000Z", "properties": props} + + +def test_command_center_disabled_by_default(monkeypatch): + monkeypatch.delenv("NOTION_COMMAND_CENTER", raising=False) + assert nc.is_command_center_enabled() is False + for val in ("on", "true", "1", "YES"): + monkeypatch.setenv("NOTION_COMMAND_CENTER", val) + assert nc.is_command_center_enabled() is True + monkeypatch.setenv("NOTION_COMMAND_CENTER", "off") + assert nc.is_command_center_enabled() is False + + +def test_db_id_resolves_env_for_command_center_keys(monkeypatch): + monkeypatch.setenv("NOTION_TITLE_DB_ID", "db-title") + monkeypatch.setenv("NOTION_COLLECTIONS_DB_ID", "db-coll") + assert nc._db_id("title") == "db-title" + assert nc._db_id("collections") == "db-coll" + assert nc._db_id("unknown_key") == "" + + +def test_fetch_status_counts_counts_by_status_and_is_pii_free(monkeypatch): + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + monkeypatch.setenv("NOTION_TITLE_DB_ID", "db-title") + _patch_post(monkeypatch, payload={"results": [ + _ops_page("Title Issued"), + _ops_page("Title Issued"), + _ops_page("MCO Received from Factory"), + ]}) + counts = nc.fetch_status_counts("title") + assert counts == {"Title Issued": 2, "MCO Received from Factory": 1} + # The PII columns on every page must not have leaked into the result. + blob = repr(counts).lower() + assert "jane" not in blob and "555-0000" not in blob and "12345" not in blob + + +def test_fetch_status_counts_tolerates_dirty_and_aliased_status_columns(monkeypatch): + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + monkeypatch.setenv("NOTION_DELIVERY_TRACKER_DB_ID", "db-delivery") + _patch_post(monkeypatch, payload={"results": [ + _ops_page("Delivered to Site", prop_name="Current Phase"), # alias, not "Status" + _ops_page("Delivered to Site", prop_name="Status "), # trailing-space column + _ops_page("Unknown one", prop_name="No Status Column At All"), # -> UNKNOWN + ]}) + counts = nc.fetch_status_counts("delivery_tracker") + assert counts == {"Delivered to Site": 2, "UNKNOWN": 1} + + +def test_fetch_status_counts_empty_when_unconfigured(monkeypatch): + monkeypatch.delenv("NOTION_TOKEN", raising=False) + monkeypatch.setenv("NOTION_TITLE_DB_ID", "db-title") + assert nc.fetch_status_counts("title") == {} + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + monkeypatch.delenv("NOTION_TITLE_DB_ID", raising=False) + assert nc.fetch_status_counts("title") == {} diff --git a/tools/notion_client.py b/tools/notion_client.py index f777b13..6b4faf5 100644 --- a/tools/notion_client.py +++ b/tools/notion_client.py @@ -64,6 +64,55 @@ def _cs_survey_db_id() -> str: return (os.environ.get("NOTION_CS_SURVEY_DB_ID") or "").strip() +# --- Command Center reader (config-driven, GCP-native, Mira-free) ------------- +# Logical DB keys -> env-var fallback. DB ids are business config, NOT secrets, +# so they may live in config.yaml `notion.databases.`; only NOTION_TOKEN is +# a credential. This is what lets the Ops Copilot read the staff's whole ops hub +# (replacing the Mira bridge) instead of just the two hard-coded DBs. +_DB_KEY_ENV = { + "delivery_tracker": "NOTION_DELIVERY_TRACKER_DB_ID", + "cs_survey": "NOTION_CS_SURVEY_DB_ID", + "service_warranty": "NOTION_SERVICE_WARRANTY_DB_ID", + "title": "NOTION_TITLE_DB_ID", + "collections": "NOTION_COLLECTIONS_DB_ID", + "insurance": "NOTION_INSURANCE_DB_ID", + "team_tasks": "NOTION_TEAM_TASKS_DB_ID", + "lead_pipeline": "NOTION_LEAD_PIPELINE_DB_ID", +} + + +def _config_db_id(db_key: str) -> str: + """Notion DB id from config.yaml `notion.databases.`, or '' (never raises).""" + try: + from config_loader import get_config + + dbs = ((get_config() or {}).get("notion") or {}).get("databases") or {} + return str(dbs.get(db_key) or "").strip() + except Exception: # noqa: BLE001 — config block is optional; degrade to env + return "" + + +def _db_id(db_key: str) -> str: + """Resolve a Command Center DB id: config.yaml first, then env fallback.""" + cfg = _config_db_id(db_key) + if cfg: + return cfg + env_name = _DB_KEY_ENV.get(db_key) + return (os.environ.get(env_name) or "").strip() if env_name else "" + + +def is_command_center_enabled() -> bool: + """Master flag for the Notion Command Center reader (default OFF). + + The reader ships dark; the Ops Copilot (the consumer) checks this before + sourcing from Notion, so the whole feature flips on/off with one env var and + reverts with no redeploy — same pattern as INVENTORY_SOURCE. + """ + return (os.getenv("NOTION_COMMAND_CENTER", "off") or "off").strip().lower() in { + "on", "true", "1", "yes", + } + + def is_installations_configured() -> bool: """True when both a token and the Delivery Tracker DB id are set.""" return bool(_token() and _delivery_tracker_db_id()) @@ -129,10 +178,13 @@ def _find_prop(props: dict[str, Any], *names: str) -> Any: """ if not isinstance(props, dict): return None - lowered = {k.lower(): v for k, v in props.items()} + # Strip + lowercase so dirty Notion column names (trailing spaces, casing, + # duplicates) still match. On a duplicate-after-normalization, the last wins. + lowered = {k.strip().lower(): v for k, v in props.items() if isinstance(k, str)} for name in names: - if name.lower() in lowered: - value = _prop_value(lowered[name.lower()]) + key = name.strip().lower() + if key in lowered: + value = _prop_value(lowered[key]) if value is not None: return value return None @@ -269,3 +321,45 @@ def fetch_feedback(limit: int = 50) -> list[dict[str, Any]]: item["deal_id"] = deal_id rows.append(item) return rows + + +# Status/phase column names across the Command Center DBs (Title='Status', +# Delivery='Current Phase', Collections/Insurance='Payment Status', +# CS-survey='Outreach Status', Lead Pipeline='Pipeline Stage', ...). _find_prop +# is case-insensitive and tolerates trailing-space/duplicate column names. +_STATUS_ALIASES = ( + "status", + "Current Phase", + "Phase", + "Pipeline Stage", + "Payment Status", + "Outreach Status", + "Outreach Stage", +) + + +def fetch_status_counts( + db_key: str, *, limit: int = 100, status_aliases: tuple[str, ...] = _STATUS_ALIASES +) -> dict[str, int]: + """Count rows by their status/phase in a Command Center DB. PII-free by construction. + + This is the generic, GCP-native reader the Ops Copilot uses to answer + "where is my delivery / title / service / collections" from the staff's real + system. It reads ONLY the status-like column (via case-insensitive alias + matching) and never any other property — so no customer identity, dollar + amount, or free-text comment can enter the result, regardless of how the + operator names or adds columns. Returns ``{status_value: count}``; ``{}`` when + unconfigured (no token / no db id) or on any error (the underlying query + never raises). + """ + db_id = _db_id(db_key) + if not _token() or not db_id: + return {} + counts: dict[str, int] = {} + for page in _query_database(db_id, limit): + if not isinstance(page, dict): + continue + status = _find_prop(page.get("properties", {}), *status_aliases) or "UNKNOWN" + key = str(status) + counts[key] = counts.get(key, 0) + 1 + return counts From 710e6d68699156486276c75987f256e624a8e3c9 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 20:55:54 -0600 Subject: [PATCH 02/10] feat(copilot): source the Ops Copilot snapshot from the Notion Command Center Integration increment 3 -- the functional Mira replacement. Teaches get_business_snapshot() to source installations (Delivery Tracker), feedback (CS-survey), and count-by-status for the post-funding ops (service & warranty, title, collections, insurance) from Notion via increment 2's reader when NOTION_COMMAND_CENTER=on; otherwise the exact Firestore behavior (regression locked by the existing aggregation test). One env var flips it; revert instant. - PII-free: every Notion-sourced section is count-by-status only. - Fault-isolated: a failing Notion read degrades to {} / Firestore fallback and never sinks the snapshot; _call_model stays the only external seam. - +2 HELP_TOPICS (where-is-my-delivery, title/collections/insurance/service). Tests: +4 (Notion-sourced snapshot incl. ops sections + PII-free assertion, flag-off regression lock, reader-raises degradation, delivery help topic). 21 green, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_ops_copilot.py | 79 ++++++++++++++++++++++++++++++++++++ tools/ops_copilot.py | 85 +++++++++++++++++++++++++++++++++++---- 2 files changed, 156 insertions(+), 8 deletions(-) diff --git a/tests/test_ops_copilot.py b/tests/test_ops_copilot.py index 6bcc073..4c4a2c9 100644 --- a/tests/test_ops_copilot.py +++ b/tests/test_ops_copilot.py @@ -314,3 +314,82 @@ async def _stub(message, history=None): body = res.json() assert body["reply"] == "Hello from the copilot." assert body["error"] is False + + +# --------------------------------------------------------------------------- # +# Notion Command Center source switch (increment 3) — the Mira replacement +# --------------------------------------------------------------------------- # +def _fake_notion_counts(monkeypatch, mapping): + from tools import notion_client + + monkeypatch.setattr( + notion_client, "fetch_status_counts", lambda db_key, **kw: mapping.get(db_key, {}) + ) + + +def test_snapshot_sources_from_notion_when_command_center_on(monkeypatch): + monkeypatch.setenv("NOTION_COMMAND_CENTER", "on") + _fake_notion_counts(monkeypatch, { + "delivery_tracker": {"Delivered to Site": 3, "Trim-Out": 2}, + "cs_survey": {"Contacted-Positive": 4, "Do Not Contact": 1}, + "service_warranty": {"Repair In Progress": 2}, + "title": {"Title Issued": 5, "MCO Received from Factory": 1}, + "collections": {"Current": 10, "Default": 1}, + "insurance": {"Policy Active": 7}, + }) + _wire_fake_data(monkeypatch, collections={ + "service_requests": [_FakeDoc({"status": "pending"})], # Firestore — should be IGNORED + "feedback": [_FakeDoc({"rating": 5})], + }) + + snap = asyncio.run(ops_copilot.get_business_snapshot()) + + # Installations now come from the Notion Delivery Tracker, not Firestore. + assert snap["installations"]["by_status"] == {"Delivered to Site": 3, "Trim-Out": 2} + # Feedback total derived from the CS-survey counts. + assert snap["feedback"] == {"total": 5, "by_status": {"Contacted-Positive": 4, "Do Not Contact": 1}} + # Post-funding ops surface as count-by-status only. + ops = snap["operations"] + assert ops["title"]["by_status"] == {"Title Issued": 5, "MCO Received from Factory": 1} + assert ops["collections"]["by_status"] == {"Current": 10, "Default": 1} + assert ops["insurance"]["by_status"] == {"Policy Active": 7} + # PII-free: the whole snapshot is statuses + counts, no identities/emails/phones. + blob = repr(snap).lower() + assert "@" not in blob and "555-" not in blob + + +def test_snapshot_firestore_only_when_command_center_off(monkeypatch): + """Regression lock: flag off => Firestore feedback count, no 'operations' key.""" + monkeypatch.delenv("NOTION_COMMAND_CENTER", raising=False) + _wire_fake_data(monkeypatch, collections={ + "service_requests": [_FakeDoc({"status": "pending"})], + "feedback": [_FakeDoc({"rating": 5}), _FakeDoc({"rating": 4})], + }) + snap = asyncio.run(ops_copilot.get_business_snapshot()) + assert snap["installations"]["by_status"] == {"pending": 1} + assert snap["feedback"] == {"total": 2} + assert "operations" not in snap + + +def test_snapshot_degrades_when_notion_reader_raises(monkeypatch): + monkeypatch.setenv("NOTION_COMMAND_CENTER", "on") + from tools import notion_client + + def _boom(db_key, **kw): + raise RuntimeError("notion down") + + monkeypatch.setattr(notion_client, "fetch_status_counts", _boom) + _wire_fake_data(monkeypatch, collections={ + "service_requests": [_FakeDoc({"status": "pending"})], + "feedback": [_FakeDoc({"rating": 5})], + }) + snap = asyncio.run(ops_copilot.get_business_snapshot()) + # Notion raised -> falls back to Firestore for installations + feedback, ops degrade to {}. + assert snap["installations"]["by_status"] == {"pending": 1} + assert snap["feedback"] == {"total": 1} + assert snap["operations"]["title"]["by_status"] == {} + + +def test_help_search_matches_delivery_status(): + hits = ops_copilot.search_platform_help("where is my home in the delivery process") + assert any("delivery" in h["title"].lower() for h in hits) diff --git a/tools/ops_copilot.py b/tools/ops_copilot.py index b86c754..a106ed5 100644 --- a/tools/ops_copilot.py +++ b/tools/ops_copilot.py @@ -140,6 +140,37 @@ def get_inventory_freshness() -> dict[str, Any]: return {"available": False} +def _command_center_enabled() -> bool: + """True when the operator flipped on the Notion Command Center source.""" + try: + from tools import notion_client + + return notion_client.is_command_center_enabled() + except Exception: # noqa: BLE001 — a config/import hiccup must not sink the snapshot + return False + + +def _notion_status_counts(db_key: str) -> dict[str, int]: + """PII-free count-by-status from a Notion Command Center DB ({} on any error).""" + try: + from tools import notion_client + + return notion_client.fetch_status_counts(db_key) + except Exception as exc: # noqa: BLE001 — graceful-degrade like every other section + logger.error("ops_copilot notion read failed", db_key=db_key, error=str(exc)) + return {} + + +def _firestore_feedback() -> dict[str, Any]: + """The Firestore feedback count — the default/fallback feedback section.""" + try: + feedback_docs = list(_get_db().collection("feedback").stream()) + return {"total": len(feedback_docs)} + except Exception as exc: + logger.error("ops_copilot feedback snapshot failed", error=str(exc)) + return {"error": "unavailable"} + + async def get_business_snapshot() -> dict[str, Any]: """Aggregate a compact, PII-free snapshot of the live business state. @@ -171,14 +202,29 @@ async def get_business_snapshot() -> dict[str, Any]: snapshot["inventory"] = {"by_status": _count_by_status("inventory")} snapshot["deals"] = {"by_status": _count_by_status("deals")} - snapshot["installations"] = {"by_status": _count_by_status("service_requests")} - - try: - feedback_docs = list(_get_db().collection("feedback").stream()) - snapshot["feedback"] = {"total": len(feedback_docs)} - except Exception as exc: - logger.error("ops_copilot feedback snapshot failed", error=str(exc)) - snapshot["feedback"] = {"error": "unavailable"} + # Installations / feedback / post-funding ops: the staff's system-of-record is + # the Notion Command Center. When NOTION_COMMAND_CENTER is on, source these + # PII-free counts from there (the functional Mira replacement — the Copilot can + # answer "where is my delivery / title / payment"); otherwise keep the exact + # Firestore behavior. One env var flips it; revert is instant. + if _command_center_enabled(): + install_counts = _notion_status_counts("delivery_tracker") + snapshot["installations"] = { + "by_status": install_counts or _count_by_status("service_requests") + } + fb_counts = _notion_status_counts("cs_survey") + snapshot["feedback"] = ( + {"total": sum(fb_counts.values()), "by_status": fb_counts} + if fb_counts + else _firestore_feedback() + ) + snapshot["operations"] = { + db_key: {"by_status": _notion_status_counts(db_key)} + for db_key in ("service_warranty", "title", "collections", "insurance") + } + else: + snapshot["installations"] = {"by_status": _count_by_status("service_requests")} + snapshot["feedback"] = _firestore_feedback() snapshot["inventory_freshness"] = get_inventory_freshness() @@ -268,6 +314,29 @@ async def get_business_snapshot() -> dict[str, Any]: "Ask me for the feedback count, or check the admin tools for the details." ), }, + { + "title": "Where is my delivery / installation status", + "keywords": ["delivery", "deliver", "install", "installation", "set", "tied down", + "trim", "white glove", "where is", "status of", "phase"], + "body": ( + "Installation progress lives in the Notion Delivery Tracker, and the Copilot " + "reads it directly — ask 'how many homes are in trim-out?' or 'what's pending " + "delivery?' for a live count by stage. For one specific customer's home, open " + "the Delivery Tracker in Notion." + ), + }, + { + "title": "Title, collections, insurance, and service status", + "keywords": ["title", "tdhca", "collections", "payment", "past due", "insurance", + "escrow", "renewal", "service", "warranty", "bill back"], + "body": ( + "Title processing, collections, insurance/KIP, and service & warranty are " + "tracked in Notion. The Copilot can give a live count by status for any of them " + "— e.g. 'how many titles are pending?' or 'how many payments are past due?' — " + "without exposing customer details. Open the matching Notion database for the " + "per-customer specifics." + ), + }, { "title": "What changed: the website's new name", "keywords": ["website", "url", "address", "name", "moved", "domain", "email", "secure", "changed"], From b6c9e9aa6f9e610eda2babfdf8e9c5e0658667c8 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:06:44 -0600 Subject: [PATCH 03/10] feat(notion): wire the real Command Center DB ids into config.yaml Activation now reduces to the NOTION_TOKEN secret + NOTION_COMMAND_CENTER=on -- the 7 ops-DB ids (Delivery Tracker, CS-survey, Service & Warranty, Title, Collections, Insurance, Lead Pipeline) live in config.yaml notion.databases.* (identifiers, not secrets; each still env-overridable via NOTION__DB_ID). Tests updated for config-then-env resolution. 36 green, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- config.yaml | 14 ++++++++++++++ tests/test_notion_client.py | 21 ++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 93cdca8..9100ad5 100644 --- a/config.yaml +++ b/config.yaml @@ -176,3 +176,17 @@ scraper: price: ".product-price" image: "img" link: "a" + +# Notion "Command Center" integration. DB ids are identifiers, NOT secrets — the +# only secret is NOTION_TOKEN (Secret Manager). The Ops Copilot reads these +# PII-free (count-by-status) when NOTION_COMMAND_CENTER=on. Each id can also be +# overridden by env NOTION__DB_ID if you'd rather not commit them. +notion: + databases: + delivery_tracker: "34e6688d-bec6-81b7-966b-c7bfebeb7a27" # 🔧 Delivery Tracker (installations) + cs_survey: "34e6688d-bec6-818b-abd5-f6a095be6cb9" # 🤝 Customer satisfaction surveys + service_warranty: "34e6688d-bec6-8190-9a2c-dc0f5db19de6" # 🛠️ Service & Warranty + title: "34e6688d-bec6-81db-b2a6-ddf06dabe151" # 📋 Title processing + collections: "34e6688d-bec6-81c8-8430-ec5b1d2efd6d" # 💰 Collections + insurance: "34e6688d-bec6-8196-84a6-fc3c64ac2768" # Insurance / KIP + lead_pipeline: "34e6688d-bec6-81b3-b3a3-d176c0f7ce44" # 👤 Lead Pipeline ("up log") diff --git a/tests/test_notion_client.py b/tests/test_notion_client.py index 03c1d5a..fd74a3d 100644 --- a/tests/test_notion_client.py +++ b/tests/test_notion_client.py @@ -267,11 +267,13 @@ def test_command_center_disabled_by_default(monkeypatch): assert nc.is_command_center_enabled() is False -def test_db_id_resolves_env_for_command_center_keys(monkeypatch): - monkeypatch.setenv("NOTION_TITLE_DB_ID", "db-title") - monkeypatch.setenv("NOTION_COLLECTIONS_DB_ID", "db-coll") - assert nc._db_id("title") == "db-title" - assert nc._db_id("collections") == "db-coll" +def test_db_id_resolves_config_then_env(monkeypatch): + # Configured keys resolve from config.yaml notion.databases.* (real ids). + assert nc._db_id("title").startswith("34e6688d") + assert nc._db_id("delivery_tracker").startswith("34e6688d") + # An unconfigured key (not in config.yaml) falls back to env. + monkeypatch.setenv("NOTION_TEAM_TASKS_DB_ID", "db-team") + assert nc._db_id("team_tasks") == "db-team" assert nc._db_id("unknown_key") == "" @@ -303,9 +305,10 @@ def test_fetch_status_counts_tolerates_dirty_and_aliased_status_columns(monkeypa def test_fetch_status_counts_empty_when_unconfigured(monkeypatch): + # Use a key NOT in config.yaml so resolution depends only on env/token. monkeypatch.delenv("NOTION_TOKEN", raising=False) - monkeypatch.setenv("NOTION_TITLE_DB_ID", "db-title") - assert nc.fetch_status_counts("title") == {} + monkeypatch.setenv("NOTION_TEAM_TASKS_DB_ID", "db-team") + assert nc.fetch_status_counts("team_tasks") == {} # no token monkeypatch.setenv("NOTION_TOKEN", "secret_test") - monkeypatch.delenv("NOTION_TITLE_DB_ID", raising=False) - assert nc.fetch_status_counts("title") == {} + monkeypatch.delenv("NOTION_TEAM_TASKS_DB_ID", raising=False) + assert nc.fetch_status_counts("team_tasks") == {} # no db id (not in config or env) From 96773b2675ff5efcd701acb7b3956ae7fe933dd5 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:07:40 -0600 Subject: [PATCH 04/10] docs(gcp): staged activation runbook for the inventory + Notion bridge Audited prod state + the exact, ordered, copy-paste gcloud commands for the gated activation (inventory read-flip, Notion token secret + IAM + flag), with the --update-* (never --set-*) safety rule and the PII/no-auto-trade notes. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/INTEGRATION_GCP_ACTIVATION.md | 105 +++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 docs/INTEGRATION_GCP_ACTIVATION.md diff --git a/docs/INTEGRATION_GCP_ACTIVATION.md b/docs/INTEGRATION_GCP_ACTIVATION.md new file mode 100644 index 0000000..08aff87 --- /dev/null +++ b/docs/INTEGRATION_GCP_ACTIVATION.md @@ -0,0 +1,105 @@ +# THO Integration — GCP Activation Runbook + +Staged, audited commands to take the inventory + Notion-bridge integration live. +Every step here is a **gated production action** (it deploys/configures the live +client app), so it is yours to run — the code is built, tested, and reversible; +this just makes the activation push-button. + +> **Audited prod state (2026-06-16):** service `project-go-forward` @ rev `00279`, +> project `tho-ai-agent`, region `us-central1`, run-as SA +> `691674245427-compute@developer.gserviceaccount.com`, Firestore `(default)` +> NATIVE us-central1. **All integration env vars are currently unset** +> (`INVENTORY_SOURCE`, `NOTION_TOKEN`, `NOTION_COMMAND_CENTER`, …) → the feature +> is fully dark; nothing changes until you run the steps below. + +> ⚠️ **Golden rule for Cloud Run config:** always use `--update-env-vars` / +> `--update-secrets` (additive). **Never** `--set-env-vars` / `--set-secrets` — +> that would wipe the 16 existing env vars + 7 secret mounts (WEBAUTHN_*, RESEND, +> admin PIN/session, partner keys) and break prod. + +--- + +## Part A — Inventory unfreeze (branch `feat/in-app-inventory`) + +1. **Rebase + merge.** That branch was cut from main `6098a91` (#192); main is now + ≥ #198. Rebase onto current main (changes are additive — new files + targeted + edits, should be clean), then merge the PR. Default `INVENTORY_SOURCE=legacy` + → **the merge deploys with zero behavior change.** + +2. **Seed Firestore from the real source.** Put `House Orders.xlsx` in `data/`, + preview, then write (uses Application Default Credentials with Firestore access + to `tho-ai-agent`): + ```bash + python -m tools.house_orders_sync # dry-run: eyeball the homes + AVAILABLE/SOLD split + python -m tools.house_orders_sync --apply # upsert into Firestore `inventory` (idempotent) + ``` + (Fallback if you don't have the sheet handy: `python -m tools.inventory_seed --apply` + seeds the 279-home May-11 snapshot instead.) + +3. **Flip the public read onto the in-app store** (gated deploy → new revision): + ```bash + gcloud run services update project-go-forward --region=us-central1 --project=tho-ai-agent \ + --update-env-vars INVENTORY_SOURCE=firestore + ``` + The site now serves Mark's current stock. **Instant revert** (no redeploy delay, + just a new revision): + ```bash + gcloud run services update project-go-forward --region=us-central1 --project=tho-ai-agent \ + --update-env-vars INVENTORY_SOURCE=legacy + ``` + +--- + +## Part B — Notion bridge / Ops Copilot (branch `feat/notion-ops-bridge`) + +The 7 ops-DB ids are already in `config.yaml`; the only secret is the token. + +1. **Store the token** (you supply the value — never paste it in chat/AI): + ```bash + printf '%s' 'ntn_YOUR_TOKEN_HERE' | gcloud secrets create notion-token \ + --data-file=- --project=tho-ai-agent + ``` + +2. **Let the app read it:** + ```bash + gcloud secrets add-iam-policy-binding notion-token \ + --member='serviceAccount:691674245427-compute@developer.gserviceaccount.com' \ + --role=roles/secretmanager.secretAccessor --project=tho-ai-agent + ``` + +3. **Share the 7 databases with the integration** (Notion UI, once each): on the + hub page (or each DB) → `•••` → **Connections** → add your integration. The + ids the app uses: Delivery Tracker, Customer-satisfaction surveys, Service & + Warranty, Title processing, Collections, Insurance/KIP, Lead Pipeline. + +4. **Merge `feat/notion-ops-bridge`**, then wire + flip on (gated deploy): + ```bash + gcloud run services update project-go-forward --region=us-central1 --project=tho-ai-agent \ + --update-secrets NOTION_TOKEN=notion-token:latest \ + --update-env-vars NOTION_COMMAND_CENTER=on + ``` + +5. **Verify:** ask the Ops Copilot *"how many titles are pending?"* or *"how many + homes are in trim-out?"* — it answers from the staff's live Notion, **count-by- + status only, no customer PII**. **Instant revert:** `--update-env-vars NOTION_COMMAND_CENTER=off` + (the snapshot silently returns to Firestore counts). + +--- + +## Part C — deferred GCP automation (built later, gated) + +- **Daily House Orders sync** — Cloud Scheduler → an admin sync endpoint → + `house_orders_sync` → Firestore, so the site refreshes without a manual run. + (Needs a sheet→GCS drop or Drive-API access for the run-as SA.) +- **Mira-secret cleanup** — after the Ops Copilot bridge proves parity in prod, + retire `tho-api-key-mira` + `telegram-asfao-token` and delete the Mira routers + (`mira_routes.py`, `mira_notify.py`, `github_mira_trigger.py`). + +## Security notes +- `NOTION_TOKEN` lives only in Secret Manager; DB ids are config (not secrets). +- The bridge is **PII-free by construction** — count-by-status only; no customer + identity, dollar figure, or free text ever enters the snapshot. +- Every activation is a single env var with an instant, no-code revert. +- The two existing scheduler loops (`brain-ooda-loop`, `asfao-decide-loop`) were + audited: both write analysis/**proposed** decisions to BigQuery — neither + executes trades. The no-auto-execution boundary holds. From 212535d146767f4fae5684af6892c048d3d5c8bf Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:10:00 -0600 Subject: [PATCH 05/10] docs(gcp): runbook reflects the actual 'notion-id' secret + SA grant done Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/INTEGRATION_GCP_ACTIVATION.md | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/docs/INTEGRATION_GCP_ACTIVATION.md b/docs/INTEGRATION_GCP_ACTIVATION.md index 08aff87..d70ab67 100644 --- a/docs/INTEGRATION_GCP_ACTIVATION.md +++ b/docs/INTEGRATION_GCP_ACTIVATION.md @@ -54,18 +54,13 @@ this just makes the activation push-button. The 7 ops-DB ids are already in `config.yaml`; the only secret is the token. -1. **Store the token** (you supply the value — never paste it in chat/AI): - ```bash - printf '%s' 'ntn_YOUR_TOKEN_HERE' | gcloud secrets create notion-token \ - --data-file=- --project=tho-ai-agent - ``` +1. **Token secret — ✅ DONE.** Stored as Secret Manager secret **`notion-id`** + (version 1 enabled). To rotate later: + `printf '%s' 'ntn_NEW' | gcloud secrets versions add notion-id --data-file=- --project=tho-ai-agent`. -2. **Let the app read it:** - ```bash - gcloud secrets add-iam-policy-binding notion-token \ - --member='serviceAccount:691674245427-compute@developer.gserviceaccount.com' \ - --role=roles/secretmanager.secretAccessor --project=tho-ai-agent - ``` +2. **SA read access — ✅ DONE.** The run-as SA + `691674245427-compute@developer.gserviceaccount.com` was granted + `roles/secretmanager.secretAccessor` on `notion-id` (so the deploy below can bind it). 3. **Share the 7 databases with the integration** (Notion UI, once each): on the hub page (or each DB) → `•••` → **Connections** → add your integration. The @@ -75,7 +70,7 @@ The 7 ops-DB ids are already in `config.yaml`; the only secret is the token. 4. **Merge `feat/notion-ops-bridge`**, then wire + flip on (gated deploy): ```bash gcloud run services update project-go-forward --region=us-central1 --project=tho-ai-agent \ - --update-secrets NOTION_TOKEN=notion-token:latest \ + --update-secrets NOTION_TOKEN=notion-id:latest \ --update-env-vars NOTION_COMMAND_CENTER=on ``` From 18d0f36deef946c677d575ef8c5b98a891df5800 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:19:03 -0600 Subject: [PATCH 06/10] feat(notion): mirror website leads into the Notion Lead Pipeline (gated, PII-safe) Integration increment 4. After a website contact-form lead is saved to Firestore (system of record), optionally mirror it into the staff's Notion Lead Pipeline so salespeople see web leads in the system they work from. Server-side, gated behind NOTION_LEAD_SYNC (default off), fire-and-forget (never blocks the visitor). - tools/notion_lead_writer.py: schema-exact write to the LIVE Lead Pipeline -- Customer Name / 'Email ' (trailing space!) / Phone Number / Pipeline Stage= New Lead / Notes (source+date stamp); deliberately AVOIDS the two dirty duplicate phone columns and never writes 'SSN / Tax ID'. Reuses the NOTION_TOKEN http core; db id from config.yaml. - main.py POST /api/contact: fire-and-forget call after create_lead; failure -> 'notion_lead_sync_failed' warning, visitor still gets success. Tests: 8 (off-by-default, exact columns incl. trailing-space Email, avoids dirty/SSN columns, omit-when-absent, posts to /pages, swallows errors). ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- main.py | 19 +++++ tests/test_notion_lead_writer.py | 124 +++++++++++++++++++++++++++++++ tools/notion_lead_writer.py | 98 ++++++++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 tests/test_notion_lead_writer.py create mode 100644 tools/notion_lead_writer.py diff --git a/main.py b/main.py index b8e6765..04d5b4b 100644 --- a/main.py +++ b/main.py @@ -4694,6 +4694,25 @@ async def submit_contact_form(request: Request): lead_id=lead_id, ) + # Mirror the lead into the Notion Lead Pipeline the salespeople work from + # (server-side, gated by NOTION_LEAD_SYNC, fire-and-forget). PII goes only + # to the staff's own CRM that already holds contact info; this is never + # reachable from a public/partner route and never blocks the visitor. + try: + from tools.notion_lead_writer import is_lead_sync_enabled, write_lead + + if is_lead_sync_enabled() and not write_lead( + name, + email=email or None, + phone=phone, + message=data.get("message"), + source=data.get("source", "contact_form"), + ): + warnings.append("notion_lead_sync_failed") + except Exception as e: + warnings.append("notion_lead_sync_failed") + struct_logger.warning("Notion lead sync failed", error=str(e)) + # Send welcome email if email provided if email: try: diff --git a/tests/test_notion_lead_writer.py b/tests/test_notion_lead_writer.py new file mode 100644 index 0000000..7ee3b63 --- /dev/null +++ b/tests/test_notion_lead_writer.py @@ -0,0 +1,124 @@ +"""Tests for the website-lead -> Notion Lead Pipeline writer. + +This is the one piece of the integration that WRITES customer PII to a live +system, so the tests pin: it's off by default, it targets the exact live schema +(incl. the trailing-space "Email " column), it avoids the dirty duplicate phone +columns, it never writes SSN, and any failure is swallowed (fire-and-forget). +HTTP is fully mocked — nothing leaves the process. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import httpx +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from tools import notion_lead_writer as nlw # noqa: E402 + + +class _FakeResponse: + def __init__(self, status_code=200): + self.status_code = status_code + + def raise_for_status(self): + if self.status_code >= 400: + raise httpx.HTTPStatusError("error", request=None, response=None) + + +def _capture_post(monkeypatch, *, raises=None, status=200): + captured = {} + + def fake_post(url, headers=None, json=None, timeout=None): + captured["url"] = url + captured["headers"] = headers + captured["json"] = json + if raises is not None: + raise raises + return _FakeResponse(status) + + monkeypatch.setattr(nlw.httpx, "post", fake_post) + return captured + + +@pytest.fixture +def enabled(monkeypatch): + monkeypatch.setenv("NOTION_LEAD_SYNC", "on") + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + # lead_pipeline db id comes from config.yaml notion.databases.lead_pipeline + + +# --- gating ------------------------------------------------------------------- + +def test_disabled_by_default(monkeypatch): + monkeypatch.delenv("NOTION_LEAD_SYNC", raising=False) + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + assert nlw.is_lead_sync_enabled() is False + + +def test_enabled_requires_flag_and_token(monkeypatch): + monkeypatch.setenv("NOTION_LEAD_SYNC", "on") + monkeypatch.delenv("NOTION_TOKEN", raising=False) + assert nlw.is_lead_sync_enabled() is False # flag on but no token + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + assert nlw.is_lead_sync_enabled() is True # config.yaml supplies the db id + + +def test_write_lead_is_noop_when_disabled(monkeypatch): + monkeypatch.delenv("NOTION_LEAD_SYNC", raising=False) + captured = _capture_post(monkeypatch) + assert nlw.write_lead("Jane Buyer", phone="555-111-2222") is False + assert captured == {} # no HTTP call at all + + +# --- schema-exact, PII-safe payload ------------------------------------------ + +def test_payload_targets_exact_live_columns(): + props = nlw.build_lead_properties( + "Jane Buyer", email="jane@example.com", phone="555-111-2222", + message="Interested in the Oak 28x56", source="contact_form", + ) + assert props["Customer Name"]["title"][0]["text"]["content"] == "Jane Buyer" + # "Email " has a trailing space in the live schema — must match exactly. + assert "Email " in props and props["Email "] == {"email": "jane@example.com"} + assert props["Phone Number"] == {"phone_number": "555-111-2222"} + assert props["Pipeline Stage"] == {"select": {"name": "New Lead"}} + notes = props["Notes"]["rich_text"][0]["text"]["content"] + assert "Website lead" in notes and "source=contact_form" in notes + assert "Interested in the Oak 28x56" in notes + + +def test_payload_avoids_dirty_and_sensitive_columns(): + props = nlw.build_lead_properties("Jane", email="j@x.com", phone="5551112222") + # The two dirty duplicate phone columns and SSN must NEVER be written. + assert "Phone number " not in props # text duplicate + assert "phone number " not in props # date-typed duplicate + assert "SSN / Tax ID" not in props + assert "Email" not in props # only the trailing-space "Email " exists + + +def test_omits_email_and_phone_when_absent(): + props = nlw.build_lead_properties("Walk-in", email=None, phone=None) + assert "Email " not in props and "Phone Number" not in props + assert props["Customer Name"]["title"][0]["text"]["content"] == "Walk-in" + + +# --- write + fire-and-forget -------------------------------------------------- + +def test_write_lead_posts_to_pages_and_returns_true(enabled, monkeypatch): + captured = _capture_post(monkeypatch, status=200) + assert nlw.write_lead("Jane", email="j@x.com", phone="5551112222") is True + assert captured["url"].endswith("/pages") + assert captured["headers"]["Authorization"] == "Bearer secret_test" + # parent db id resolved from config.yaml (real Lead Pipeline id) + assert captured["json"]["parent"]["database_id"].startswith("34e6688d") + + +def test_write_lead_swallows_errors_and_returns_false(enabled, monkeypatch): + _capture_post(monkeypatch, raises=httpx.ConnectError("boom")) + assert nlw.write_lead("Jane", phone="5551112222") is False # never raises diff --git a/tools/notion_lead_writer.py b/tools/notion_lead_writer.py new file mode 100644 index 0000000..8ceb1f3 --- /dev/null +++ b/tools/notion_lead_writer.py @@ -0,0 +1,98 @@ +"""Mirror a website-captured lead into the Notion Lead Pipeline (the staff CRM). + +Server-side only — called from ``POST /api/contact`` AFTER the lead is saved to +Firestore (the system of record). It is never reachable from a public/partner +route or the browser. The Lead Pipeline is the salespeople's own contact CRM +(it has Email/Phone columns), so mirroring name/email/phone there is appropriate +— this is the only PII the integration writes, and only into the client's own +internal system, not anywhere public. + +Safety properties: +- **Gated.** No-op unless ``NOTION_LEAD_SYNC`` is on AND the token + Lead Pipeline + db id are configured. Default OFF. +- **Fire-and-forget.** Any failure logs a warning and is swallowed so it never + affects the visitor's response (mirrors the existing ``lead_storage_failed``). +- **Schema-exact.** Targets the live property names inspected 2026-06-16 — incl. + the trailing-space ``"Email "`` column — and writes only the clean + ``"Phone Number"`` (phone_number), deliberately avoiding the two dirty + duplicate phone columns (one text, one mistyped as a date). It does NOT write + to ``"SSN / Tax ID"`` or any other sensitive column. +""" + +from __future__ import annotations + +import logging +import os +from datetime import UTC, datetime + +import httpx + +from structured_logging import logger as struct_logger +from tools.notion_client import _NOTION_API, _NOTION_VERSION, _TIMEOUT_SECONDS, _db_id, _token + +log = logging.getLogger(__name__) + +# EXACT live Lead Pipeline property names. "Email " has a real trailing space; +# we write only the clean phone_number column and skip the dirty duplicates. +_P_NAME = "Customer Name" # title +_P_EMAIL = "Email " # email — trailing space is intentional (live schema) +_P_PHONE = "Phone Number" # phone_number (NOT "Phone number " / "phone number ") +_P_STAGE = "Pipeline Stage" # select +_P_NOTES = "Notes" # rich_text — holds the source/date stamp + message +_NEW_LEAD_STAGE = "New Lead" + + +def is_lead_sync_enabled() -> bool: + """True only when NOTION_LEAD_SYNC is on AND token + Lead Pipeline db id exist.""" + flag = (os.getenv("NOTION_LEAD_SYNC", "off") or "off").strip().lower() in { + "on", "true", "1", "yes", + } + return flag and bool(_token() and _db_id("lead_pipeline")) + + +def build_lead_properties(name, *, email=None, phone=None, message=None, source="website") -> dict: + """Build the Notion ``properties`` payload (pure; unit-tested for PII-safety).""" + stamp = f"Website lead · {datetime.now(UTC).date().isoformat()} · source={source}" + note = stamp if not message else f"{stamp}\n\n{str(message)[:1500]}" + props: dict = { + _P_NAME: {"title": [{"text": {"content": (name or "Website Lead")[:200]}}]}, + _P_STAGE: {"select": {"name": _NEW_LEAD_STAGE}}, + _P_NOTES: {"rich_text": [{"text": {"content": note}}]}, + } + if email: + props[_P_EMAIL] = {"email": str(email)[:200]} + if phone: + props[_P_PHONE] = {"phone_number": str(phone)[:50]} + return props + + +def write_lead(name, *, email=None, phone=None, message=None, source="website") -> bool: + """Create a Lead Pipeline row for a website lead. Never raises. + + Returns True on a 2xx create, False if disabled or on any error (logged as + ``notion_lead_sync_failed`` so a Cloud Run log alert can fire). + """ + if not is_lead_sync_enabled(): + return False + payload = { + "parent": {"database_id": _db_id("lead_pipeline")}, + "properties": build_lead_properties( + name, email=email, phone=phone, message=message, source=source + ), + } + headers = { + "Authorization": f"Bearer {_token()}", + "Notion-Version": _NOTION_VERSION, + "Content-Type": "application/json", + } + try: + resp = httpx.post( + f"{_NOTION_API}/pages", headers=headers, json=payload, timeout=_TIMEOUT_SECONDS + ) + resp.raise_for_status() + return True + except Exception as e: # noqa: BLE001 — fire-and-forget; must never reach the visitor + struct_logger.warning( + "notion lead sync failed", event="notion_lead_sync_failed", error=str(e) + ) + return False From 35fbfafb03aa64942564ded0ce7f1b1dab4dc449 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:38:45 -0600 Subject: [PATCH 07/10] fix(notion): paginate Command Center queries (full count, not capped at 100) All-hands audit: _query_database sent a single page (<=100 rows) with no has_more/next_cursor loop, so fetch_status_counts silently undercounted any DB over 100 rows -> a confidently-wrong staff answer once the flag flips. Now it follows Notion's cursor up to _MAX_QUERY_PAGES (20 = 2000 rows), bounded by the requested limit, preserving the 8s/never-raise contract (returns partial on error). fetch_status_counts default limit raised to 2000. +1 two-page test. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_notion_client.py | 21 ++++++++++++++++ tools/notion_client.py | 49 ++++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/tests/test_notion_client.py b/tests/test_notion_client.py index fd74a3d..d0bb751 100644 --- a/tests/test_notion_client.py +++ b/tests/test_notion_client.py @@ -312,3 +312,24 @@ def test_fetch_status_counts_empty_when_unconfigured(monkeypatch): monkeypatch.setenv("NOTION_TOKEN", "secret_test") monkeypatch.delenv("NOTION_TEAM_TASKS_DB_ID", raising=False) assert nc.fetch_status_counts("team_tasks") == {} # no db id (not in config or env) + + +def test_query_database_paginates_across_has_more(monkeypatch): + """fetch_status_counts must count across pages, not silently cap at 100.""" + monkeypatch.setenv("NOTION_TOKEN", "secret_test") + pages = [ + {"results": [_ops_page("A"), _ops_page("A")], "has_more": True, "next_cursor": "cur2"}, + {"results": [_ops_page("B"), _ops_page("A")], "has_more": False, "next_cursor": None}, + ] + state = {"i": 0, "cursors": []} + + def fake_post(url, headers=None, json=None, timeout=None): + state["cursors"].append(json.get("start_cursor")) + page = pages[state["i"]] + state["i"] += 1 + return FakeResponse(page) + + monkeypatch.setattr(nc.httpx, "post", fake_post) + counts = nc.fetch_status_counts("title") # db id from config.yaml + assert counts == {"A": 3, "B": 1} # summed across both pages + assert state["cursors"] == [None, "cur2"] # page 2 used next_cursor diff --git a/tools/notion_client.py b/tools/notion_client.py index 6b4faf5..a4606a9 100644 --- a/tools/notion_client.py +++ b/tools/notion_client.py @@ -212,10 +212,19 @@ def _normalize_created_at(raw: Any, fallback: str | None) -> str: # --------------------------------------------------------------------------- # HTTP # --------------------------------------------------------------------------- +# A query that wants a full count is bounded by this many pages (x100 rows) so +# it respects the per-request timeout and the never-raise contract even on a +# huge database. +_MAX_QUERY_PAGES = 20 + + def _query_database(db_id: str, limit: int) -> list[dict[str, Any]]: - """POST /databases/{db_id}/query and return the raw ``results`` list. + """POST /databases/{db_id}/query and return up to ``limit`` rows, paginating. - Never raises: on any error logs and returns ``[]``. + Follows Notion's ``has_more``/``next_cursor`` so callers that need a full + count (``fetch_status_counts``) are not silently capped at 100. Bounded by + ``limit`` and ``_MAX_QUERY_PAGES``. Never raises: on any error logs and + returns whatever was collected so far. """ token = _token() if not token or not db_id: @@ -226,18 +235,30 @@ def _query_database(db_id: str, limit: int) -> list[dict[str, Any]]: "Notion-Version": _NOTION_VERSION, "Content-Type": "application/json", } - # Notion caps page_size at 100; clamp the requested limit into [1, 100]. - page_size = max(1, min(int(limit), 100)) - payload = {"page_size": page_size} - try: - resp = httpx.post(url, headers=headers, json=payload, timeout=_TIMEOUT_SECONDS) - resp.raise_for_status() - body = resp.json() + want = max(1, int(limit)) + out: list[dict[str, Any]] = [] + cursor: str | None = None + for _ in range(_MAX_QUERY_PAGES): + # Notion caps page_size at 100; never ask for more than we still want. + payload: dict[str, Any] = {"page_size": max(1, min(want - len(out), 100))} + if cursor: + payload["start_cursor"] = cursor + try: + resp = httpx.post(url, headers=headers, json=payload, timeout=_TIMEOUT_SECONDS) + resp.raise_for_status() + body = resp.json() + except Exception as e: # noqa: BLE001 — graceful degradation by contract + struct_logger.error("notion query failed", db_id=db_id, error=str(e)) + return out results = body.get("results", []) - return results if isinstance(results, list) else [] - except Exception as e: # noqa: BLE001 — graceful degradation by contract - struct_logger.error("notion query failed", db_id=db_id, error=str(e)) - return [] + if isinstance(results, list): + out.extend(results) + if len(out) >= want or not body.get("has_more"): + break + cursor = body.get("next_cursor") + if not cursor: + break + return out[:want] # --------------------------------------------------------------------------- @@ -339,7 +360,7 @@ def fetch_feedback(limit: int = 50) -> list[dict[str, Any]]: def fetch_status_counts( - db_key: str, *, limit: int = 100, status_aliases: tuple[str, ...] = _STATUS_ALIASES + db_key: str, *, limit: int = 2000, status_aliases: tuple[str, ...] = _STATUS_ALIASES ) -> dict[str, int]: """Count rows by their status/phase in a Command Center DB. PII-free by construction. From 9744e8b200f0d7cce6a1f4fd5961ef060e59de22 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:40:23 -0600 Subject: [PATCH 08/10] fix(contact): keep Notion lead-sync failures server-side (no public disclosure) All-hands audit (low): /api/contact echoed 'notion_lead_sync_failed' in the public warnings[] returned to anonymous visitors, disclosing that leads are mirrored to Notion. Now logged server-side only (event field preserved for a Cloud Run log alert); not surfaced externally. The integration stays invisible. Co-Authored-By: Claude Opus 4.8 (1M context) --- main.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 04d5b4b..e136620 100644 --- a/main.py +++ b/main.py @@ -4701,17 +4701,21 @@ async def submit_contact_form(request: Request): try: from tools.notion_lead_writer import is_lead_sync_enabled, write_lead - if is_lead_sync_enabled() and not write_lead( - name, - email=email or None, - phone=phone, - message=data.get("message"), - source=data.get("source", "contact_form"), - ): - warnings.append("notion_lead_sync_failed") + if is_lead_sync_enabled(): + # write_lead is fire-and-forget and logs notion_lead_sync_failed + # itself on failure. Do NOT surface that to the anonymous visitor + # in `warnings` — keep the integration invisible externally. + write_lead( + name, + email=email or None, + phone=phone, + message=data.get("message"), + source=data.get("source", "contact_form"), + ) except Exception as e: - warnings.append("notion_lead_sync_failed") - struct_logger.warning("Notion lead sync failed", error=str(e)) + struct_logger.warning( + "Notion lead sync failed", event="notion_lead_sync_failed", error=str(e) + ) # Send welcome email if email provided if email: From 081c06a4650d8be2e984fc5a98e773971801d307 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:43:20 -0600 Subject: [PATCH 09/10] feat(notion): status-label PII guard + lead_id join stamp - fetch_status_counts: collapse a status VALUE that is overlong or matches phone/email/SSN to 'OTHER', so operator free-text can't ride the counts-only channel into the LLM prompt (all-hands optional hardening). - notion_lead_writer + /api/contact: stamp the Firestore lead_id into the Lead Pipeline row's Notes as 'ref=' -- the cross-system join key (no dedicated 'Lead ID' field exists yet; app stays SoR for the raw lead). +2 tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- main.py | 1 + tests/test_notion_client.py | 7 +++++++ tests/test_notion_lead_writer.py | 6 ++++++ tools/notion_client.py | 19 ++++++++++++++++++- tools/notion_lead_writer.py | 12 +++++++++--- 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index e136620..16a846d 100644 --- a/main.py +++ b/main.py @@ -4711,6 +4711,7 @@ async def submit_contact_form(request: Request): phone=phone, message=data.get("message"), source=data.get("source", "contact_form"), + lead_id=lead_id, ) except Exception as e: struct_logger.warning( diff --git a/tests/test_notion_client.py b/tests/test_notion_client.py index d0bb751..4b69bd2 100644 --- a/tests/test_notion_client.py +++ b/tests/test_notion_client.py @@ -333,3 +333,10 @@ def fake_post(url, headers=None, json=None, timeout=None): counts = nc.fetch_status_counts("title") # db id from config.yaml assert counts == {"A": 3, "B": 1} # summed across both pages assert state["cursors"] == [None, "cur2"] # page 2 used next_cursor + + +def test_status_label_guard_collapses_pii_like_values(): + assert nc._safe_status_label("Title Issued") == "Title Issued" # enum-like: kept + assert nc._safe_status_label("Call John Smith 555-123-4567") == "OTHER" # phone -> OTHER + assert nc._safe_status_label("email me at j@x.com") == "OTHER" # email -> OTHER + assert nc._safe_status_label("x" * 80) == "OTHER" # overlong -> OTHER diff --git a/tests/test_notion_lead_writer.py b/tests/test_notion_lead_writer.py index 7ee3b63..67ba06a 100644 --- a/tests/test_notion_lead_writer.py +++ b/tests/test_notion_lead_writer.py @@ -122,3 +122,9 @@ def test_write_lead_posts_to_pages_and_returns_true(enabled, monkeypatch): def test_write_lead_swallows_errors_and_returns_false(enabled, monkeypatch): _capture_post(monkeypatch, raises=httpx.ConnectError("boom")) assert nlw.write_lead("Jane", phone="5551112222") is False # never raises + + +def test_lead_id_is_stamped_into_notes_as_join_ref(): + props = nlw.build_lead_properties("Jane", source="contact_form", lead_id="contact_123_ab") + notes = props["Notes"]["rich_text"][0]["text"]["content"] + assert "ref=contact_123_ab" in notes # Firestore lead_id = the cross-system join key diff --git a/tools/notion_client.py b/tools/notion_client.py index a4606a9..a86accb 100644 --- a/tools/notion_client.py +++ b/tools/notion_client.py @@ -37,6 +37,7 @@ from __future__ import annotations import os +import re from datetime import UTC, datetime from typing import Any @@ -358,6 +359,22 @@ def fetch_feedback(limit: int = 50) -> list[dict[str, Any]]: "Outreach Stage", ) +# Status VALUES should be enum-like labels. If an operator types free text with +# embedded PII (e.g. a phase literally named "Call John Smith 555-1234"), that +# string would otherwise become a count key and ride the "counts-only" channel +# into the Gemini prompt. Defense-in-depth: collapse such values to "OTHER". +_PII_IN_LABEL = re.compile( + r"\d{3}-\d{2}-\d{4}" # SSN + r"|[\w.+-]+@[\w-]+\.\w{2,}" # email + r"|(?:\+?\d[\s.\-()]?){9,}\d" # long digit run (phone) +) + + +def _safe_status_label(value: object) -> str: + """Enum-like status label, or 'OTHER' if it's overlong or looks like PII.""" + s = str(value) + return "OTHER" if len(s) > 60 or _PII_IN_LABEL.search(s) else s + def fetch_status_counts( db_key: str, *, limit: int = 2000, status_aliases: tuple[str, ...] = _STATUS_ALIASES @@ -381,6 +398,6 @@ def fetch_status_counts( if not isinstance(page, dict): continue status = _find_prop(page.get("properties", {}), *status_aliases) or "UNKNOWN" - key = str(status) + key = _safe_status_label(status) counts[key] = counts.get(key, 0) + 1 return counts diff --git a/tools/notion_lead_writer.py b/tools/notion_lead_writer.py index 8ceb1f3..5a275bb 100644 --- a/tools/notion_lead_writer.py +++ b/tools/notion_lead_writer.py @@ -50,9 +50,15 @@ def is_lead_sync_enabled() -> bool: return flag and bool(_token() and _db_id("lead_pipeline")) -def build_lead_properties(name, *, email=None, phone=None, message=None, source="website") -> dict: +def build_lead_properties( + name, *, email=None, phone=None, message=None, source="website", lead_id=None +) -> dict: """Build the Notion ``properties`` payload (pure; unit-tested for PII-safety).""" stamp = f"Website lead · {datetime.now(UTC).date().isoformat()} · source={source}" + if lead_id: + # The Firestore lead_id is the cross-system join key (app stays SoR for + # the raw lead); stamping it makes the Notion row traceable back. + stamp += f" · ref={lead_id}" note = stamp if not message else f"{stamp}\n\n{str(message)[:1500]}" props: dict = { _P_NAME: {"title": [{"text": {"content": (name or "Website Lead")[:200]}}]}, @@ -66,7 +72,7 @@ def build_lead_properties(name, *, email=None, phone=None, message=None, source= return props -def write_lead(name, *, email=None, phone=None, message=None, source="website") -> bool: +def write_lead(name, *, email=None, phone=None, message=None, source="website", lead_id=None) -> bool: """Create a Lead Pipeline row for a website lead. Never raises. Returns True on a 2xx create, False if disabled or on any error (logged as @@ -77,7 +83,7 @@ def write_lead(name, *, email=None, phone=None, message=None, source="website") payload = { "parent": {"database_id": _db_id("lead_pipeline")}, "properties": build_lead_properties( - name, email=email, phone=phone, message=message, source=source + name, email=email, phone=phone, message=message, source=source, lead_id=lead_id ), } headers = { From f0f0b01e7e6f631535f23a9f9490b4f3f18f2578 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:44:47 -0600 Subject: [PATCH 10/10] feat(ops): GET /api/admin/ops-snapshot -- read-only PII-free Ops dashboard data Next-level (high value): the in-app, GCP-native read surface that replaces the Telegram/Mira status pushes. Admin-gated, read-only, returns get_business_snapshot() -- counts only (leads/appointments/inventory/deals/installations/feedback + the Notion Command Center ops counts when NOTION_COMMAND_CENTER=on), never PII/dollars, never 500s. +2 tests (gate + shape). Co-Authored-By: Claude Opus 4.8 (1M context) --- main.py | 20 ++++++++++++++++++++ tests/test_api_v1.py | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/main.py b/main.py index 16a846d..9464df3 100644 --- a/main.py +++ b/main.py @@ -6881,6 +6881,26 @@ def _seo_public_homes() -> list: from schemas.copilot_schemas import CopilotRequest +@app.get("/api/admin/ops-snapshot", dependencies=[Depends(require_admin)]) +async def admin_ops_snapshot(): + """Live, PII-free business snapshot for the admin Ops dashboard. + + Read-only and admin-gated. Aggregates COUNTS only (leads/appointments/ + inventory/deals/installations/feedback, plus the Notion Command Center ops + counts — delivery/title/collections/insurance — when NOTION_COMMAND_CENTER is + on). Never any customer identity or dollar figure. Each section is + fault-isolated; this never 500s. This is the read surface that replaces the + Telegram/Mira status pushes with an in-app, GCP-native view. + """ + from tools.ops_copilot import get_business_snapshot + + try: + return {"success": True, "snapshot": await get_business_snapshot()} + except Exception as e: + struct_logger.error("Ops snapshot failed", error=str(e)) + return {"success": False, "error": "Snapshot unavailable."} + + @app.post("/api/admin/copilot", dependencies=[Depends(require_admin)]) async def admin_ops_copilot(body: CopilotRequest): """Answer a staff question using live business data + platform how-to. diff --git a/tests/test_api_v1.py b/tests/test_api_v1.py index 42897b8..70b86f7 100644 --- a/tests/test_api_v1.py +++ b/tests/test_api_v1.py @@ -2459,3 +2459,24 @@ def test_create_inventory_strips_dealer_cost(monkeypatch): for forbidden in ("invoice_amount", "invoice_date", "cost"): assert forbidden not in stored assert stored["model_name"] == "The Nassau" + + +def test_ops_snapshot_requires_admin(monkeypatch): + client, _main, _db, _logger = create_client(monkeypatch, tho_api_key="tho-secret") + assert client.get("/api/admin/ops-snapshot").status_code == 401 + + +def test_ops_snapshot_returns_counts(monkeypatch): + client, main, _db, _logger = create_client(monkeypatch, tho_api_key="tho-secret") + from tools import ops_copilot + + async def _fake_snapshot(): + return {"leads": {"total": 3}, "operations": {"title": {"by_status": {"Title Issued": 5}}}} + + monkeypatch.setattr(ops_copilot, "get_business_snapshot", _fake_snapshot) + token = main._create_admin_token() + resp = client.get("/api/admin/ops-snapshot", headers={"X-Admin-Token": token}) + assert resp.status_code == 200 + body = resp.json() + assert body["success"] is True + assert body["snapshot"]["operations"]["title"]["by_status"] == {"Title Issued": 5}