From b6b9fe578f2ebc2b0d9b46fe5d780a65810d2c16 Mon Sep 17 00:00:00 2001
From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com>
Date: Tue, 16 Jun 2026 20:54:28 -0600
Subject: [PATCH 1/2] test(backend): harden silent failures + pin revenue-path
 behavior

Observability for previously-silent error swallows (no control-flow change):
- caching.py: log the Redis DEL swallow
- seo_routes.py: log catch-and-ignore in homes/JSON-LD/shell/SPA render
- mira_routes.py: log silent swallows in installations/feedback helpers

New/converted tests (no application behavior change):
- test_crm.py: legacy import-time script -> 5 assertion tests
  (was appending a fake lead to data/leads.json on every collection)
- test_contact_lead_capture.py: contact form -> Lead creation + bad-phone reject
- test_form_extraction.py: SSN/DOB/income never reach the LLM payload
- test_social_publishers.py: fail-closed draft gating, readiness, UTM CTA builder
- test_lead_attribution.py: utm:/referrer: source bucketing

Full suite: 1018 passed, 11 skipped, 0 failed.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 caching.py                         |   4 +-
 mira_routes.py                     |  17 ++-
 seo_routes.py                      |  15 +-
 tests/test_contact_lead_capture.py |  48 ++++++
 tests/test_crm.py                  | 108 ++++++++------
 tests/test_form_extraction.py      | 229 +++++++++++++++++++++++++++++
 tests/test_lead_attribution.py     |  77 ++++++++++
 tests/test_social_publishers.py    | 207 ++++++++++++++++++++++++++
 8 files changed, 651 insertions(+), 54 deletions(-)
 create mode 100644 tests/test_form_extraction.py
 create mode 100644 tests/test_lead_attribution.py
 create mode 100644 tests/test_social_publishers.py

diff --git a/caching.py b/caching.py
index 7aaffbd..eece988 100644
--- a/caching.py
+++ b/caching.py
@@ -114,8 +114,8 @@ def cache_delete(key: str):
     if client:
         try:
             client.delete(key)
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning(f"Redis delete error for {key}: {e}")
 
     if key in _local_cache:
         del _local_cache[key]
diff --git a/mira_routes.py b/mira_routes.py
index 8dbc341..0d231f9 100644
--- a/mira_routes.py
+++ b/mira_routes.py
@@ -97,7 +97,10 @@ def _parse_timestamp(value: Any) -> datetime | None:
             if dt.tzinfo is None:
                 dt = dt.replace(tzinfo=UTC)
             return dt.astimezone(UTC)
-        except Exception:
+        except Exception as e:
+            struct_logger.warning(
+                "mira timestamp parse failed", value=value, error=str(e)
+            )
             return None
     return None
 
@@ -109,7 +112,12 @@ def _count_collection_by_status(collection_name: str, status_field: str = "statu
         docs = db.collection(collection_name).stream()
         statuses = [doc.to_dict().get(status_field, "UNKNOWN") for doc in docs]
         return dict(Counter(statuses))
-    except Exception:
+    except Exception as e:
+        struct_logger.warning(
+            "mira collection status count failed",
+            collection=collection_name,
+            error=str(e),
+        )
         return {"UNKNOWN": 0}
 
 
@@ -609,7 +617,10 @@ async def mira_firestore_collections(request: Request, limit: int = 1000) -> dic
             try:
                 count = len(list(col.limit(limit).stream()))
                 result.append({"collection": col.id, "count": count})
-            except Exception:
+            except Exception as e:
+                struct_logger.warning(
+                    "mira collection count failed", collection=col.id, error=str(e)
+                )
                 result.append({"collection": col.id, "count": None})
         return {
             "status": "healthy",
diff --git a/seo_routes.py b/seo_routes.py
index e8cd62e..fa76522 100644
--- a/seo_routes.py
+++ b/seo_routes.py
@@ -21,6 +21,7 @@
 
 import html
 import json
+import logging
 import os
 import re
 import threading
@@ -44,6 +45,8 @@
 
 router = APIRouter()
 
+logger = logging.getLogger(__name__)
+
 # ── Wiring (set by main.py at startup) ─────────────────────────────────────
 
 _get_homes = None  # callable -> list[dict]; the merged public inventory
@@ -197,8 +200,9 @@ def _safe_homes() -> list[dict]:
         return []
     try:
         return _get_homes() or []
-    except Exception:
+    except Exception as e:
         # SEO surface must never take the page down with it.
+        logger.warning(f"SEO _safe_homes: inventory fetch failed, serving empty: {e}")
         return []
 
 
@@ -353,7 +357,8 @@ def _product_jsonld(home: dict, canonical_url: str) -> dict | None:
     price = home.get("price_value")
     try:
         price = float(price) if price is not None else None
-    except (TypeError, ValueError):
+    except (TypeError, ValueError) as e:
+        logger.warning(f"SEO _product_jsonld: unparseable price_value {price!r}, omitting Product JSON-LD: {e}")
         price = None
     if not (price and price > 0):
         return None
@@ -416,7 +421,8 @@ def _shell() -> str:
             content = f.read()
         _shell_cache = (mtime, content)
         return content
-    except OSError:
+    except OSError as e:
+        logger.warning(f"SEO _shell: cannot read index shell {_index_html_path!r}, using minimal fallback: {e}")
         return '<!doctype html><html><head><title></title></head><body><div id="root"></div></body></html>'
 
 
@@ -1047,7 +1053,8 @@ def render_spa_response(full_path: str) -> Response | None:
     to its default file handling. Never raises."""
     try:
         return _render_spa_response(full_path)
-    except Exception:
+    except Exception as e:
+        logger.warning(f"SEO render_spa_response: rendering failed for {full_path!r}, falling through: {e}")
         return None
 
 
diff --git a/tests/test_contact_lead_capture.py b/tests/test_contact_lead_capture.py
index 25bb05e..9e60ce6 100644
--- a/tests/test_contact_lead_capture.py
+++ b/tests/test_contact_lead_capture.py
@@ -56,3 +56,51 @@ async def boom(_lead):
     assert body["success"] is True
     # ... but the dropped lead is now loud + alertable.
     assert "lead_storage_failed" in body.get("warnings", [])
+
+
+def test_contact_creates_lead_with_name_phone_and_source(monkeypatch):
+    """A valid name+phone POST persists a Lead carrying those fields + source."""
+    client, main, *_ = create_client(monkeypatch)
+    before = len(main.lead_manager.leads)
+
+    body = _post(
+        client, name="Carol", phone="(281) 324-3020", email="carol@example.com"
+    ).json()
+    assert body["success"] is True
+
+    # FakeLeadManager.create_lead appends the persisted Lead to .leads.
+    assert len(main.lead_manager.leads) == before + 1
+    created = main.lead_manager.leads[-1]
+    assert created.name == "Carol"
+    assert created.phone == "(281) 324-3020"
+    assert created.email == "carol@example.com"
+    assert created.source == "contact_form"  # default source for the contact form
+
+
+def test_contact_lead_carries_explicit_source(monkeypatch):
+    """A caller-supplied `source` flows through to the persisted Lead."""
+    client, main, *_ = create_client(monkeypatch)
+    before = len(main.lead_manager.leads)
+
+    body = _post(
+        client, name="Dave", phone="2813243020", source="facebook_ad"
+    ).json()
+    assert body["success"] is True
+
+    assert len(main.lead_manager.leads) == before + 1
+    created = main.lead_manager.leads[-1]
+    assert created.name == "Dave"
+    assert created.phone == "2813243020"
+    assert created.source == "facebook_ad"
+
+
+def test_contact_invalid_phone_rejected_and_creates_no_lead(monkeypatch):
+    """An invalid/short phone is rejected (success=false) and no Lead is stored."""
+    client, main, *_ = create_client(monkeypatch)
+    before = len(main.lead_manager.leads)
+
+    body = _post(client, name="Eve", phone="55512").json()
+    assert body["success"] is False
+    assert "error" in body
+    # Rejection happens before lead creation, so nothing is persisted.
+    assert len(main.lead_manager.leads) == before
diff --git a/tests/test_crm.py b/tests/test_crm.py
index 9f48573..39de6e4 100644
--- a/tests/test_crm.py
+++ b/tests/test_crm.py
@@ -1,48 +1,66 @@
-import json
-import os
-import sys
+"""Tests for tools.crm_tools.save_lead — lead capture validation + contract.
+
+Replaces a legacy smoke *script* that called ``save_lead`` at module top level,
+which (a) provided zero pytest coverage and (b) appended a fake lead to
+``data/leads.json`` every time the suite was merely collected. These tests pin
+the validation rules and success contract without writing to disk.
+"""
+
+import pytest
 
-# Add project root to path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from tools import crm_tools
 
-# Test 1: Valid Lead
-print("Testing valid lead...")
-result = crm_tools.save_lead(
-    user_name="John Doe",
-    phone_number="555-123-4567",
-    interest_notes="Looking for a 3 bedroom double wide.",
-)
-print(f"Result: {result}")
-
-if result["success"]:
-    print("SUCCESS: Valid lead accepted.")
-else:
-    print("FAILURE: Valid lead rejected.")
-
-# Test 2: Invalid Phone
-print("\nTesting invalid phone...")
-result = crm_tools.save_lead(user_name="Invalid Phone", phone_number="123", interest_notes="test")
-print(f"Result: {result}")
-
-if not result["success"]:
-    print("SUCCESS: Invalid phone rejected.")
-else:
-    print("FAILURE: Invalid phone accepted.")
-
-# Test 3: Check local file (if writable)
-try:
-    data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
-    leads_file = os.path.join(data_dir, "leads.json")
-    if os.path.exists(leads_file):
-        with open(leads_file) as f:
-            leads = json.load(f)
-            last_lead = leads[-1]
-            if last_lead["name"] == "John Doe":
-                print("\nSUCCESS: Lead found in local JSON file.")
-            else:
-                print(f"\nFAILURE: Last lead was {last_lead['name']}, expected John Doe.")
-    else:
-        print("\nNOTE: leads.json not found (expected if data dir not writable/created).")
-except Exception as e:
-    print(f"\nError checking file: {e}")
+
+@pytest.fixture(autouse=True)
+def _no_disk_writes(monkeypatch):
+    """Keep save_lead's success path from appending to the repo's data/leads.json.
+
+    save_lead guards its file write behind ``os.access(..., os.W_OK)``; forcing
+    that False exercises the full structure-and-return logic while skipping the
+    side effect, so collecting/running tests never pollutes local lead data.
+    """
+    monkeypatch.setattr(crm_tools.os, "access", lambda *a, **k: False)
+
+
+def test_save_lead_valid_returns_success():
+    result = crm_tools.save_lead(
+        user_name="John Doe",
+        phone_number="555-123-4567",
+        interest_notes="Looking for a 3 bedroom double wide.",
+    )
+    assert result["success"] is True
+    # Confirmation echoes the customer's name and number back to the agent.
+    assert "John Doe" in result["message"]
+    assert "555-123-4567" in result["message"]
+
+
+def test_save_lead_accepts_formatted_phone():
+    result = crm_tools.save_lead(
+        user_name="Jane Smith",
+        phone_number="(281) 555-0100",
+        interest_notes="Financing question",
+    )
+    assert result["success"] is True
+
+
+def test_save_lead_rejects_missing_name():
+    result = crm_tools.save_lead(
+        user_name="", phone_number="555-123-4567", interest_notes="test"
+    )
+    assert result["success"] is False
+    assert "name" in result["message"].lower()
+
+
+def test_save_lead_rejects_missing_phone():
+    result = crm_tools.save_lead(
+        user_name="No Phone", phone_number="", interest_notes="test"
+    )
+    assert result["success"] is False
+
+
+def test_save_lead_rejects_short_phone():
+    result = crm_tools.save_lead(
+        user_name="Short Phone", phone_number="123", interest_notes="test"
+    )
+    assert result["success"] is False
+    assert "invalid" in result["message"].lower() or "10" in result["message"]
diff --git a/tests/test_form_extraction.py b/tests/test_form_extraction.py
new file mode 100644
index 0000000..5f9b672
--- /dev/null
+++ b/tests/test_form_extraction.py
@@ -0,0 +1,229 @@
+"""Tests for tools/form_extraction.py — PII filtering before LLM calls.
+
+These tests prove the guardrail in CLAUDE.md: "Never send PII to LLM — strip PII
+fields before Gemini API calls." We confirm that:
+  * SSN / DOB / income field definitions are NOT included in the extraction prompt
+    that gets sent to Gemini (PII-redaction path).
+  * Normal (non-PII) fields still flow through to the prompt and the result
+    (happy path).
+  * Even if the LLM hallucinates a PII field back, it is dropped from the result.
+
+The Gemini/genai call is fully mocked — no network calls are made.
+
+Run: python -m pytest tests/test_form_extraction.py -q
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import google.genai  # noqa: F401  (ensure the real submodule is importable before patching)
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from config.field_map_loader import get_fields_for_template
+from tools import form_extraction
+
+# Template that mixes PII fields (SSN, Date of Birth) with normal fields.
+TEMPLATE = "creditapp.pdf"
+
+# A conversation transcript that contains raw PII values. The extraction layer
+# should never surface these as *fields* to the model, and the field metadata it
+# does send must not reference SSN/DOB/income.
+CONVERSATION = (
+    "user: Hi, I'm John Doe and my employer is Acme Corp.\n"
+    "user: My SSN is 123-45-6789 and I was born on 01/02/1980.\n"
+    "user: My monthly income is $5000 and my phone is 555-123-4567."
+)
+
+# PII data-field names (from config/field_map.json) that must never be offered
+# to the LLM as extractable fields.
+PII_FIELD_NAMES = {"buyer_ssn", "buyer_dob", "buyer_income", "co_buyer_ssn", "co_buyer_dob"}
+
+# PII labels that appear in the field definitions — they must not leak into the
+# prompt either (the prompt lists fields by label).
+PII_LABELS = {"SSN", "Date of Birth", "Monthly Income"}
+
+
+class _FakeResponse:
+    """Mimics the genai generate_content response (exposes a .text attribute)."""
+
+    def __init__(self, text: str):
+        self.text = text
+
+
+class _FakeModels:
+    def __init__(self, captured: dict, response_json: str):
+        self._captured = captured
+        self._response_json = response_json
+
+    def generate_content(self, model=None, contents=None):
+        # Capture exactly what would be sent to Gemini so the test can inspect it.
+        self._captured["model"] = model
+        self._captured["contents"] = contents
+        return _FakeResponse(self._response_json)
+
+
+class _FakeClient:
+    def __init__(self, captured: dict, response_json: str):
+        self.models = _FakeModels(captured, response_json)
+
+
+def _install_fake_genai(captured: dict, response_json: str):
+    """Patch `google.genai.Client` so no network call is made.
+
+    `extract_form_data_from_session` does `from google import genai` at call
+    time and then calls `genai.Client()`. Patching the `Client` symbol on the
+    real submodule makes the fake visible to that import (a sys.modules patch
+    alone does not work, because `from google import genai` resolves `genai` as
+    an attribute of the already-imported `google` package object).
+    """
+
+    def _client_factory(*args, **kwargs):
+        return _FakeClient(captured, response_json)
+
+    return patch("google.genai.Client", _client_factory)
+
+
+def _run(coro):
+    return asyncio.run(coro)
+
+
+def test_pii_field_definitions_are_stripped_from_llm_prompt():
+    """PII-redaction path: the prompt sent to Gemini must not reference any
+    SSN / DOB / income field name or label."""
+    captured: dict = {}
+    # LLM returns only safe, validated fields.
+    response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}'
+
+    with _install_fake_genai(captured, response_json), patch.object(
+        form_extraction,
+        "_get_conversation_text",
+        return_value=CONVERSATION,
+    ) as _mock_convo:
+        # Make the patched _get_conversation_text awaitable.
+        async def _fake_convo(session_id, runner=None):
+            return CONVERSATION
+
+        _mock_convo.side_effect = _fake_convo
+
+        result = _run(
+            form_extraction.extract_form_data_from_session(
+                session_id="sess-1",
+                template_name=TEMPLATE,
+                runner=None,
+            )
+        )
+
+    prompt = captured.get("contents")
+    assert prompt is not None, "Gemini was never called / prompt not captured"
+
+    # No PII field *name* should appear in the prompt's field list.
+    for pii_field in PII_FIELD_NAMES:
+        assert pii_field not in prompt, f"PII field name leaked into LLM prompt: {pii_field}"
+
+    # No PII *label* should appear in the listed extractable fields. We check the
+    # field-list region of the prompt (the section after "Fields to look for:")
+    # so the standing safety instruction ("Do NOT extract any SSN, date of
+    # birth, income...") doesn't trip the assertion.
+    field_list_region = prompt.split("Conversation:")[0]
+    fields_to_look_for = field_list_region.split("Fields to look for:")[-1]
+    for label in PII_LABELS:
+        assert (
+            label not in fields_to_look_for
+        ), f"PII label leaked into the extractable-field list: {label}"
+
+    # The result itself must not contain any PII field.
+    extracted = result["extracted_data"]
+    for pii_field in PII_FIELD_NAMES:
+        assert pii_field not in extracted, f"PII field present in extracted result: {pii_field}"
+
+
+def test_non_pii_fields_pass_through_to_prompt_and_result():
+    """Happy path: normal fields are offered to the model and returned."""
+    captured: dict = {}
+    response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}'
+
+    with _install_fake_genai(captured, response_json), patch.object(
+        form_extraction, "_get_conversation_text"
+    ) as mock_convo:
+
+        async def _fake_convo(session_id, runner=None):
+            return CONVERSATION
+
+        mock_convo.side_effect = _fake_convo
+
+        result = _run(
+            form_extraction.extract_form_data_from_session(
+                session_id="sess-2",
+                template_name=TEMPLATE,
+                runner=None,
+            )
+        )
+
+    prompt = captured["contents"]
+    # Representative non-PII fields are present in the prompt.
+    assert "buyer_name" in prompt
+    assert "employer_name" in prompt
+
+    # And the extracted result carries them through.
+    extracted = result["extracted_data"]
+    assert extracted.get("buyer_name") == "John Doe"
+    assert extracted.get("employer_name") == "Acme Corp"
+
+
+def test_llm_returned_pii_keys_are_dropped_from_result():
+    """Defense in depth: even if the model echoes back a PII key, the validation
+    step drops it because it is not in the safe-field allowlist."""
+    captured: dict = {}
+    # Adversarial: model tries to return an SSN/DOB value.
+    response_json = (
+        '{"buyer_name": "John Doe", "buyer_ssn": "123-45-6789", '
+        '"buyer_dob": "01/02/1980"}'
+    )
+
+    with _install_fake_genai(captured, response_json), patch.object(
+        form_extraction, "_get_conversation_text"
+    ) as mock_convo:
+
+        async def _fake_convo(session_id, runner=None):
+            return CONVERSATION
+
+        mock_convo.side_effect = _fake_convo
+
+        result = _run(
+            form_extraction.extract_form_data_from_session(
+                session_id="sess-3",
+                template_name=TEMPLATE,
+                runner=None,
+            )
+        )
+
+    extracted = result["extracted_data"]
+    assert "buyer_ssn" not in extracted
+    assert "buyer_dob" not in extracted
+    assert extracted.get("buyer_name") == "John Doe"
+
+
+def test_template_safe_fields_exclude_all_pii_definitions():
+    """Sanity check on the field registry itself: every field flagged pii=True
+    for this template is excluded from the safe (LLM-bound) field set, mirroring
+    the filter inside extract_form_data_from_session."""
+    template_fields = get_fields_for_template(TEMPLATE)
+    pii_in_template = {
+        name for name, defn in template_fields.items() if defn.get("pii", False)
+    }
+    # Template must actually contain PII fields, else the test proves nothing.
+    assert pii_in_template, "Expected creditapp.pdf to define PII fields"
+
+    safe_fields = {
+        name: defn
+        for name, defn in template_fields.items()
+        if not defn.get("pii", False)
+    }
+    # No PII field name survives into the safe set.
+    assert pii_in_template.isdisjoint(safe_fields.keys())
+    # Specifically SSN and DOB are gone.
+    assert "buyer_ssn" not in safe_fields
+    assert "buyer_dob" not in safe_fields
diff --git a/tests/test_lead_attribution.py b/tests/test_lead_attribution.py
new file mode 100644
index 0000000..34561b1
--- /dev/null
+++ b/tests/test_lead_attribution.py
@@ -0,0 +1,77 @@
+"""Lead-source attribution categorization tests.
+
+``main._categorize_lead_source`` maps a Lead-like object onto the coarse
+buckets the CRM attribution chart consumes. Its precedence is:
+
+    utm_source  >  referrer  >  raw source bucket
+
+The real ``Lead`` dataclass has no ``utm_source`` / ``referrer`` fields yet —
+the function reads them defensively with ``getattr(..., None)`` so it stays
+crash-free if they ever land. We exercise those branches with a lightweight
+``SimpleNamespace`` fake (no Firestore), and cover the raw-source fallbacks
+with the actual ``Lead`` dataclass.
+"""
+
+from types import SimpleNamespace
+
+from lead_management import Lead
+from main import _categorize_lead_source
+
+
+def _fake_lead(source=None, utm_source=None, referrer=None):
+    """A minimal Lead-like object — no Firestore, no dataclass overhead."""
+    return SimpleNamespace(source=source, utm_source=utm_source, referrer=referrer)
+
+
+def test_utm_source_takes_priority_and_is_lowercased():
+    # utm_source="Instagram" -> "utm:instagram"; utm wins even when a raw
+    # source and referrer are also present.
+    lead = _fake_lead(source="chat", utm_source="Instagram", referrer="https://t.co/x")
+    assert _categorize_lead_source(lead) == "utm:instagram"
+
+
+def test_referrer_falls_back_to_host():
+    # No utm -> referrer host (protocol + path stripped).
+    lead = _fake_lead(referrer="https://www.google.com/search?q=mobile+homes")
+    assert _categorize_lead_source(lead) == "referrer:www.google.com"
+
+
+def test_referrer_host_is_truncated_to_40_chars():
+    host = "a" * 60
+    lead = _fake_lead(referrer=f"http://{host}.com/path")
+    result = _categorize_lead_source(lead)
+    assert result.startswith("referrer:")
+    assert result == "referrer:" + ("a" * 40)
+
+
+def test_referrer_with_no_host_becomes_direct():
+    lead = _fake_lead(referrer="https://")
+    assert _categorize_lead_source(lead) == "referrer:direct"
+
+
+def test_known_raw_source_bucket():
+    # No utm / referrer -> the raw source bucket. "chat" passes through.
+    lead = _fake_lead(source="chat")
+    assert _categorize_lead_source(lead) == "chat"
+
+
+def test_chat_intake_normalized_to_chat():
+    lead = _fake_lead(source="chat_intake")
+    assert _categorize_lead_source(lead) == "chat"
+
+
+def test_empty_source_is_other():
+    lead = _fake_lead(source="")
+    assert _categorize_lead_source(lead) == "other"
+
+
+def test_unknown_raw_source_passes_through_lowercased():
+    lead = _fake_lead(source="Facebook_Ad")
+    assert _categorize_lead_source(lead) == "facebook_ad"
+
+
+def test_real_lead_dataclass_uses_source_bucket():
+    # A genuine Lead has no utm_source/referrer attrs; getattr defaults keep
+    # the function on the raw-source branch without raising AttributeError.
+    lead = Lead(lead_id="L1", user_id="U1", session_id="S1", source="instagram")
+    assert _categorize_lead_source(lead) == "instagram"
diff --git a/tests/test_social_publishers.py b/tests/test_social_publishers.py
new file mode 100644
index 0000000..67cc012
--- /dev/null
+++ b/tests/test_social_publishers.py
@@ -0,0 +1,207 @@
+"""Tests for the fail-closed social publishing adapters.
+
+These assert the safety contract of ``tools/social_publishers.py``:
+nothing is published unless the explicit ``THO_SOCIAL_PUBLISH_ENABLED`` gate is
+on and the required tokens are configured, readiness reporting is accurate, and
+the opt-in UTM CTA link builder stays a strict no-op by default.
+
+All environment access is monkeypatched and the module-level ``requests`` is
+replaced with a guard that fails loudly if any test would make a real HTTP call.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from tools import social_publishers
+
+# Every env var the module reads. We clear all of them per test so the suite is
+# hermetic regardless of the developer's shell or CI secrets.
+_SOCIAL_ENV_VARS = (
+    "THO_SOCIAL_PUBLISH_ENABLED",
+    "THO_UTM_CTA_ENABLED",
+    "THO_UTM_SOURCE",
+    "THO_UTM_MEDIUM",
+    "PUBLIC_SITE_URL",
+    "TIKTOK_ACCESS_TOKEN",
+    "TIKTOK_PRIVACY_LEVEL",
+    "META_ACCESS_TOKEN",
+    "META_GRAPH_VERSION",
+    "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+)
+
+
+class _NoHTTP:
+    """Stand-in for ``requests`` that fails if any HTTP method is invoked."""
+
+    def __getattr__(self, name: str):
+        def _boom(*args, **kwargs):  # pragma: no cover - only fires on misuse
+            raise AssertionError(
+                f"Unexpected real HTTP call: requests.{name}({args!r}, {kwargs!r})"
+            )
+
+        return _boom
+
+
+@pytest.fixture(autouse=True)
+def _isolate_env_and_block_http(monkeypatch):
+    """Clear all social env vars and forbid real HTTP for every test."""
+    for var in _SOCIAL_ENV_VARS:
+        monkeypatch.delenv(var, raising=False)
+    # The module imports ``requests`` at module scope, so patch it there.
+    monkeypatch.setattr(social_publishers, "requests", _NoHTTP())
+    # _canonical_origin() can fall back to config_loader.get_business(); keep the
+    # CTA tests deterministic by relying only on PUBLIC_SITE_URL (set per test).
+    return monkeypatch
+
+
+# ---------------------------------------------------------------------------
+# (1) prepare_or_publish_social_post returns a draft (no publish) when the
+#     THO_SOCIAL_PUBLISH_ENABLED gate is unset.
+# ---------------------------------------------------------------------------
+
+
+def test_prepare_returns_draft_when_publish_gate_unset(monkeypatch):
+    # Fully configure tiktok tokens + site URL so the ONLY thing missing is the
+    # publish gate. This isolates the gate as the reason it stays a draft.
+    monkeypatch.setenv("TIKTOK_ACCESS_TOKEN", "tok-abc")
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+    # THO_SOCIAL_PUBLISH_ENABLED intentionally left unset.
+
+    result = social_publishers.prepare_or_publish_social_post(
+        platform="tiktok",
+        content_type="video",
+        scheduled_time="2026-07-01T12:00:00",
+        caption="New listing tour",
+        hashtags=["#texashomes"],
+        video_url="https://cdn.example.com/clip.mp4",
+    )
+
+    assert result["success"] is True
+    assert result["status"] == "draft_ready"
+    assert result["live_integration"] is False
+    assert result["publish_attempted"] is False
+    assert result["post_id"].startswith("DRAFT-")
+    # The blocking reason must point at the disabled publish gate.
+    assert "THO_SOCIAL_PUBLISH_ENABLED" in result["publish_blocked_reason"]
+    # readiness embedded in the draft confirms publish is not enabled.
+    assert result["social_readiness"]["publish_enabled"] is False
+
+
+def test_prepare_draft_does_not_call_requests(monkeypatch):
+    # Even with tokens present, an unset gate must not reach the network. The
+    # autouse _NoHTTP guard would raise AssertionError if it did.
+    monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+    monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123")
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+    result = social_publishers.prepare_or_publish_social_post(
+        platform="instagram_reels",
+        content_type="video",
+        scheduled_time="2026-07-01T12:00:00",
+        caption="Reel",
+        video_url="https://cdn.example.com/reel.mp4",
+    )
+
+    assert result["status"] == "draft_ready"
+    assert result["publish_attempted"] is False
+
+
+# ---------------------------------------------------------------------------
+# (2) social_readiness() reports instagram_reels configured=false with the
+#     correct required_env when tokens are absent, true when present.
+# ---------------------------------------------------------------------------
+
+
+def test_social_readiness_instagram_unconfigured_lists_required_env(monkeypatch):
+    # No META/IG/site tokens set (cleared by autouse fixture).
+    readiness = social_publishers.social_readiness()
+    ig = readiness["platforms"]["instagram_reels"]
+
+    assert ig["configured"] is False
+    # All three inputs are missing, so all three must be reported.
+    assert set(ig["required_env"]) == {
+        "META_ACCESS_TOKEN",
+        "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+        "PUBLIC_SITE_URL",
+    }
+    assert ig["api"] == "Meta Instagram Content Publishing API"
+    assert readiness["publish_enabled"] is False
+
+
+def test_social_readiness_instagram_partial_lists_only_missing(monkeypatch):
+    # Token present, account id + site URL still missing.
+    monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+
+    ig = social_publishers.social_readiness()["platforms"]["instagram_reels"]
+
+    assert ig["configured"] is False
+    assert set(ig["required_env"]) == {
+        "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+        "PUBLIC_SITE_URL",
+    }
+
+
+def test_social_readiness_instagram_configured_when_all_present(monkeypatch):
+    monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+    monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123")
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+    ig = social_publishers.social_readiness()["platforms"]["instagram_reels"]
+
+    assert ig["configured"] is True
+    assert ig["required_env"] == []
+
+
+# ---------------------------------------------------------------------------
+# (3) The UTM CTA link builder returns None when THO_UTM_CTA_ENABLED is unset,
+#     and a correctly-tagged URL when enabled + a canonical origin is present.
+# ---------------------------------------------------------------------------
+
+
+def test_utm_cta_link_none_when_gate_unset(monkeypatch):
+    # Origin present but the opt-in gate is off -> strict no-op.
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+    assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None
+
+
+def test_utm_cta_link_none_when_enabled_but_no_origin(monkeypatch):
+    # Gate on but no resolvable origin -> still no-op. Block the config_loader
+    # fallback so the absence of PUBLIC_SITE_URL truly means "no origin".
+    monkeypatch.setenv("THO_UTM_CTA_ENABLED", "true")
+    monkeypatch.setattr(
+        social_publishers, "_canonical_origin", lambda: None
+    )
+
+    assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None
+
+
+def test_utm_cta_link_tagged_url_when_enabled_with_origin(monkeypatch):
+    monkeypatch.setenv("THO_UTM_CTA_ENABLED", "1")
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com/")
+
+    link = social_publishers._utm_cta_link("tiktok", "Spring Sale 2026")
+
+    assert link is not None
+    # Trailing slash on origin is stripped before composing the URL.
+    assert link.startswith("https://example.com/?")
+    assert "utm_source=tiktok" in link
+    assert "utm_medium=social" in link
+    # Campaign is slugified: lowercased, non-alnum runs -> single hyphen.
+    assert "utm_campaign=spring-sale-2026" in link
+
+
+def test_utm_cta_link_respects_source_and_medium_overrides(monkeypatch):
+    monkeypatch.setenv("THO_UTM_CTA_ENABLED", "yes")
+    monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+    monkeypatch.setenv("THO_UTM_SOURCE", "ig")
+    monkeypatch.setenv("THO_UTM_MEDIUM", "paid_social")
+
+    link = social_publishers._utm_cta_link("instagram_reels", None)
+
+    assert link is not None
+    assert "utm_source=ig" in link
+    assert "utm_medium=paid_social" in link
+    # Empty campaign falls back to the "ad-studio" default token.
+    assert "utm_campaign=ad-studio" in link

From 4a2611c85e5abb8d3135e5a0f345d04f73903f06 Mon Sep 17 00:00:00 2001
From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com>
Date: Tue, 16 Jun 2026 21:05:55 -0600
Subject: [PATCH 2/2] test(attribution): import main via Firestore-stubbed
 fixture (CI fix)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

test_lead_attribution.py imported `main` at module top, which instantiates a
Firestore client at import — failing collection in CI's no-creds "no Firestore/
GCS" job (passed locally only because of developer ADC). Route the import
through create_client(monkeypatch) like the rest of the suite, so main loads
with Firestore stubbed. Verified: 9 passed with GCP credentials unset.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/test_lead_attribution.py | 61 +++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/tests/test_lead_attribution.py b/tests/test_lead_attribution.py
index 34561b1..a0cb3a9 100644
--- a/tests/test_lead_attribution.py
+++ b/tests/test_lead_attribution.py
@@ -10,12 +10,35 @@
 crash-free if they ever land. We exercise those branches with a lightweight
 ``SimpleNamespace`` fake (no Firestore), and cover the raw-source fallbacks
 with the actual ``Lead`` dataclass.
+
+``main`` instantiates a Firestore client at import (``lead_manager =
+LeadManager(...)``), which needs GCP credentials the CI "no Firestore/GCS"
+job lacks. So ``_categorize_lead_source`` is pulled in via the ``categorize``
+fixture, which first calls ``create_client`` to stub those eager imports the
+same way the rest of the suite does — importing ``main`` raw at module top
+errors during collection in a creds-less environment.
 """
 
+import sys
+from pathlib import Path
 from types import SimpleNamespace
 
+import pytest
+
 from lead_management import Lead
-from main import _categorize_lead_source
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+
+@pytest.fixture
+def categorize(monkeypatch):
+    """Return ``main._categorize_lead_source`` with Firestore stubbed out."""
+    from test_api_v1 import create_client
+
+    create_client(monkeypatch)
+    from main import _categorize_lead_source
+
+    return _categorize_lead_source
 
 
 def _fake_lead(source=None, utm_source=None, referrer=None):
@@ -23,55 +46,55 @@ def _fake_lead(source=None, utm_source=None, referrer=None):
     return SimpleNamespace(source=source, utm_source=utm_source, referrer=referrer)
 
 
-def test_utm_source_takes_priority_and_is_lowercased():
+def test_utm_source_takes_priority_and_is_lowercased(categorize):
     # utm_source="Instagram" -> "utm:instagram"; utm wins even when a raw
     # source and referrer are also present.
     lead = _fake_lead(source="chat", utm_source="Instagram", referrer="https://t.co/x")
-    assert _categorize_lead_source(lead) == "utm:instagram"
+    assert categorize(lead) == "utm:instagram"
 
 
-def test_referrer_falls_back_to_host():
+def test_referrer_falls_back_to_host(categorize):
     # No utm -> referrer host (protocol + path stripped).
     lead = _fake_lead(referrer="https://www.google.com/search?q=mobile+homes")
-    assert _categorize_lead_source(lead) == "referrer:www.google.com"
+    assert categorize(lead) == "referrer:www.google.com"
 
 
-def test_referrer_host_is_truncated_to_40_chars():
+def test_referrer_host_is_truncated_to_40_chars(categorize):
     host = "a" * 60
     lead = _fake_lead(referrer=f"http://{host}.com/path")
-    result = _categorize_lead_source(lead)
+    result = categorize(lead)
     assert result.startswith("referrer:")
     assert result == "referrer:" + ("a" * 40)
 
 
-def test_referrer_with_no_host_becomes_direct():
+def test_referrer_with_no_host_becomes_direct(categorize):
     lead = _fake_lead(referrer="https://")
-    assert _categorize_lead_source(lead) == "referrer:direct"
+    assert categorize(lead) == "referrer:direct"
 
 
-def test_known_raw_source_bucket():
+def test_known_raw_source_bucket(categorize):
     # No utm / referrer -> the raw source bucket. "chat" passes through.
     lead = _fake_lead(source="chat")
-    assert _categorize_lead_source(lead) == "chat"
+    assert categorize(lead) == "chat"
 
 
-def test_chat_intake_normalized_to_chat():
+def test_chat_intake_normalized_to_chat(categorize):
     lead = _fake_lead(source="chat_intake")
-    assert _categorize_lead_source(lead) == "chat"
+    assert categorize(lead) == "chat"
 
 
-def test_empty_source_is_other():
+def test_empty_source_is_other(categorize):
     lead = _fake_lead(source="")
-    assert _categorize_lead_source(lead) == "other"
+    assert categorize(lead) == "other"
 
 
-def test_unknown_raw_source_passes_through_lowercased():
+def test_unknown_raw_source_passes_through_lowercased(categorize):
     lead = _fake_lead(source="Facebook_Ad")
-    assert _categorize_lead_source(lead) == "facebook_ad"
+    assert categorize(lead) == "facebook_ad"
 
 
-def test_real_lead_dataclass_uses_source_bucket():
+def test_real_lead_dataclass_uses_source_bucket(categorize):
     # A genuine Lead has no utm_source/referrer attrs; getattr defaults keep
     # the function on the raw-source branch without raising AttributeError.
     lead = Lead(lead_id="L1", user_id="U1", session_id="S1", source="instagram")
-    assert _categorize_lead_source(lead) == "instagram"
+    assert categorize(lead) == "instagram"