From b6b9fe578f2ebc2b0d9b46fe5d780a65810d2c16 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:54:28 -0600 Subject: [PATCH 1/2] test(backend): harden silent failures + pin revenue-path behavior Observability for previously-silent error swallows (no control-flow change): - caching.py: log the Redis DEL swallow - seo_routes.py: log catch-and-ignore in homes/JSON-LD/shell/SPA render - mira_routes.py: log silent swallows in installations/feedback helpers New/converted tests (no application behavior change): - test_crm.py: legacy import-time script -> 5 assertion tests (was appending a fake lead to data/leads.json on every collection) - test_contact_lead_capture.py: contact form -> Lead creation + bad-phone reject - test_form_extraction.py: SSN/DOB/income never reach the LLM payload - test_social_publishers.py: fail-closed draft gating, readiness, UTM CTA builder - test_lead_attribution.py: utm:/referrer: source bucketing Full suite: 1018 passed, 11 skipped, 0 failed. Co-Authored-By: Claude Opus 4.8 --- caching.py | 4 +- mira_routes.py | 17 ++- seo_routes.py | 15 +- tests/test_contact_lead_capture.py | 48 ++++++ tests/test_crm.py | 108 ++++++++------ tests/test_form_extraction.py | 229 +++++++++++++++++++++++++++++ tests/test_lead_attribution.py | 77 ++++++++++ tests/test_social_publishers.py | 207 ++++++++++++++++++++++++++ 8 files changed, 651 insertions(+), 54 deletions(-) create mode 100644 tests/test_form_extraction.py create mode 100644 tests/test_lead_attribution.py create mode 100644 tests/test_social_publishers.py diff --git a/caching.py b/caching.py index 7aaffbd..eece988 100644 --- a/caching.py +++ b/caching.py @@ -114,8 +114,8 @@ def cache_delete(key: str): if client: try: client.delete(key) - except Exception: - pass + except Exception as e: + logger.warning(f"Redis delete error for {key}: {e}") if key in _local_cache: del _local_cache[key] diff --git a/mira_routes.py b/mira_routes.py index 8dbc341..0d231f9 100644 --- a/mira_routes.py +++ b/mira_routes.py @@ -97,7 +97,10 @@ def _parse_timestamp(value: Any) -> datetime | None: if dt.tzinfo is None: dt = dt.replace(tzinfo=UTC) return dt.astimezone(UTC) - except Exception: + except Exception as e: + struct_logger.warning( + "mira timestamp parse failed", value=value, error=str(e) + ) return None return None @@ -109,7 +112,12 @@ def _count_collection_by_status(collection_name: str, status_field: str = "statu docs = db.collection(collection_name).stream() statuses = [doc.to_dict().get(status_field, "UNKNOWN") for doc in docs] return dict(Counter(statuses)) - except Exception: + except Exception as e: + struct_logger.warning( + "mira collection status count failed", + collection=collection_name, + error=str(e), + ) return {"UNKNOWN": 0} @@ -609,7 +617,10 @@ async def mira_firestore_collections(request: Request, limit: int = 1000) -> dic try: count = len(list(col.limit(limit).stream())) result.append({"collection": col.id, "count": count}) - except Exception: + except Exception as e: + struct_logger.warning( + "mira collection count failed", collection=col.id, error=str(e) + ) result.append({"collection": col.id, "count": None}) return { "status": "healthy", diff --git a/seo_routes.py b/seo_routes.py index e8cd62e..fa76522 100644 --- a/seo_routes.py +++ b/seo_routes.py @@ -21,6 +21,7 @@ import html import json +import logging import os import re import threading @@ -44,6 +45,8 @@ router = APIRouter() +logger = logging.getLogger(__name__) + # ── Wiring (set by main.py at startup) ───────────────────────────────────── _get_homes = None # callable -> list[dict]; the merged public inventory @@ -197,8 +200,9 @@ def _safe_homes() -> list[dict]: return [] try: return _get_homes() or [] - except Exception: + except Exception as e: # SEO surface must never take the page down with it. + logger.warning(f"SEO _safe_homes: inventory fetch failed, serving empty: {e}") return [] @@ -353,7 +357,8 @@ def _product_jsonld(home: dict, canonical_url: str) -> dict | None: price = home.get("price_value") try: price = float(price) if price is not None else None - except (TypeError, ValueError): + except (TypeError, ValueError) as e: + logger.warning(f"SEO _product_jsonld: unparseable price_value {price!r}, omitting Product JSON-LD: {e}") price = None if not (price and price > 0): return None @@ -416,7 +421,8 @@ def _shell() -> str: content = f.read() _shell_cache = (mtime, content) return content - except OSError: + except OSError as e: + logger.warning(f"SEO _shell: cannot read index shell {_index_html_path!r}, using minimal fallback: {e}") return '
' @@ -1047,7 +1053,8 @@ def render_spa_response(full_path: str) -> Response | None: to its default file handling. Never raises.""" try: return _render_spa_response(full_path) - except Exception: + except Exception as e: + logger.warning(f"SEO render_spa_response: rendering failed for {full_path!r}, falling through: {e}") return None diff --git a/tests/test_contact_lead_capture.py b/tests/test_contact_lead_capture.py index 25bb05e..9e60ce6 100644 --- a/tests/test_contact_lead_capture.py +++ b/tests/test_contact_lead_capture.py @@ -56,3 +56,51 @@ async def boom(_lead): assert body["success"] is True # ... but the dropped lead is now loud + alertable. assert "lead_storage_failed" in body.get("warnings", []) + + +def test_contact_creates_lead_with_name_phone_and_source(monkeypatch): + """A valid name+phone POST persists a Lead carrying those fields + source.""" + client, main, *_ = create_client(monkeypatch) + before = len(main.lead_manager.leads) + + body = _post( + client, name="Carol", phone="(281) 324-3020", email="carol@example.com" + ).json() + assert body["success"] is True + + # FakeLeadManager.create_lead appends the persisted Lead to .leads. + assert len(main.lead_manager.leads) == before + 1 + created = main.lead_manager.leads[-1] + assert created.name == "Carol" + assert created.phone == "(281) 324-3020" + assert created.email == "carol@example.com" + assert created.source == "contact_form" # default source for the contact form + + +def test_contact_lead_carries_explicit_source(monkeypatch): + """A caller-supplied `source` flows through to the persisted Lead.""" + client, main, *_ = create_client(monkeypatch) + before = len(main.lead_manager.leads) + + body = _post( + client, name="Dave", phone="2813243020", source="facebook_ad" + ).json() + assert body["success"] is True + + assert len(main.lead_manager.leads) == before + 1 + created = main.lead_manager.leads[-1] + assert created.name == "Dave" + assert created.phone == "2813243020" + assert created.source == "facebook_ad" + + +def test_contact_invalid_phone_rejected_and_creates_no_lead(monkeypatch): + """An invalid/short phone is rejected (success=false) and no Lead is stored.""" + client, main, *_ = create_client(monkeypatch) + before = len(main.lead_manager.leads) + + body = _post(client, name="Eve", phone="55512").json() + assert body["success"] is False + assert "error" in body + # Rejection happens before lead creation, so nothing is persisted. + assert len(main.lead_manager.leads) == before diff --git a/tests/test_crm.py b/tests/test_crm.py index 9f48573..39de6e4 100644 --- a/tests/test_crm.py +++ b/tests/test_crm.py @@ -1,48 +1,66 @@ -import json -import os -import sys +"""Tests for tools.crm_tools.save_lead — lead capture validation + contract. + +Replaces a legacy smoke *script* that called ``save_lead`` at module top level, +which (a) provided zero pytest coverage and (b) appended a fake lead to +``data/leads.json`` every time the suite was merely collected. These tests pin +the validation rules and success contract without writing to disk. +""" + +import pytest -# Add project root to path -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from tools import crm_tools -# Test 1: Valid Lead -print("Testing valid lead...") -result = crm_tools.save_lead( - user_name="John Doe", - phone_number="555-123-4567", - interest_notes="Looking for a 3 bedroom double wide.", -) -print(f"Result: {result}") - -if result["success"]: - print("SUCCESS: Valid lead accepted.") -else: - print("FAILURE: Valid lead rejected.") - -# Test 2: Invalid Phone -print("\nTesting invalid phone...") -result = crm_tools.save_lead(user_name="Invalid Phone", phone_number="123", interest_notes="test") -print(f"Result: {result}") - -if not result["success"]: - print("SUCCESS: Invalid phone rejected.") -else: - print("FAILURE: Invalid phone accepted.") - -# Test 3: Check local file (if writable) -try: - data_dir = os.path.join(os.path.dirname(__file__), "..", "data") - leads_file = os.path.join(data_dir, "leads.json") - if os.path.exists(leads_file): - with open(leads_file) as f: - leads = json.load(f) - last_lead = leads[-1] - if last_lead["name"] == "John Doe": - print("\nSUCCESS: Lead found in local JSON file.") - else: - print(f"\nFAILURE: Last lead was {last_lead['name']}, expected John Doe.") - else: - print("\nNOTE: leads.json not found (expected if data dir not writable/created).") -except Exception as e: - print(f"\nError checking file: {e}") + +@pytest.fixture(autouse=True) +def _no_disk_writes(monkeypatch): + """Keep save_lead's success path from appending to the repo's data/leads.json. + + save_lead guards its file write behind ``os.access(..., os.W_OK)``; forcing + that False exercises the full structure-and-return logic while skipping the + side effect, so collecting/running tests never pollutes local lead data. + """ + monkeypatch.setattr(crm_tools.os, "access", lambda *a, **k: False) + + +def test_save_lead_valid_returns_success(): + result = crm_tools.save_lead( + user_name="John Doe", + phone_number="555-123-4567", + interest_notes="Looking for a 3 bedroom double wide.", + ) + assert result["success"] is True + # Confirmation echoes the customer's name and number back to the agent. + assert "John Doe" in result["message"] + assert "555-123-4567" in result["message"] + + +def test_save_lead_accepts_formatted_phone(): + result = crm_tools.save_lead( + user_name="Jane Smith", + phone_number="(281) 555-0100", + interest_notes="Financing question", + ) + assert result["success"] is True + + +def test_save_lead_rejects_missing_name(): + result = crm_tools.save_lead( + user_name="", phone_number="555-123-4567", interest_notes="test" + ) + assert result["success"] is False + assert "name" in result["message"].lower() + + +def test_save_lead_rejects_missing_phone(): + result = crm_tools.save_lead( + user_name="No Phone", phone_number="", interest_notes="test" + ) + assert result["success"] is False + + +def test_save_lead_rejects_short_phone(): + result = crm_tools.save_lead( + user_name="Short Phone", phone_number="123", interest_notes="test" + ) + assert result["success"] is False + assert "invalid" in result["message"].lower() or "10" in result["message"] diff --git a/tests/test_form_extraction.py b/tests/test_form_extraction.py new file mode 100644 index 0000000..5f9b672 --- /dev/null +++ b/tests/test_form_extraction.py @@ -0,0 +1,229 @@ +"""Tests for tools/form_extraction.py — PII filtering before LLM calls. + +These tests prove the guardrail in CLAUDE.md: "Never send PII to LLM — strip PII +fields before Gemini API calls." We confirm that: + * SSN / DOB / income field definitions are NOT included in the extraction prompt + that gets sent to Gemini (PII-redaction path). + * Normal (non-PII) fields still flow through to the prompt and the result + (happy path). + * Even if the LLM hallucinates a PII field back, it is dropped from the result. + +The Gemini/genai call is fully mocked — no network calls are made. + +Run: python -m pytest tests/test_form_extraction.py -q +""" + +import asyncio +import sys +from pathlib import Path +from unittest.mock import patch + +import google.genai # noqa: F401 (ensure the real submodule is importable before patching) + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from config.field_map_loader import get_fields_for_template +from tools import form_extraction + +# Template that mixes PII fields (SSN, Date of Birth) with normal fields. +TEMPLATE = "creditapp.pdf" + +# A conversation transcript that contains raw PII values. The extraction layer +# should never surface these as *fields* to the model, and the field metadata it +# does send must not reference SSN/DOB/income. +CONVERSATION = ( + "user: Hi, I'm John Doe and my employer is Acme Corp.\n" + "user: My SSN is 123-45-6789 and I was born on 01/02/1980.\n" + "user: My monthly income is $5000 and my phone is 555-123-4567." +) + +# PII data-field names (from config/field_map.json) that must never be offered +# to the LLM as extractable fields. +PII_FIELD_NAMES = {"buyer_ssn", "buyer_dob", "buyer_income", "co_buyer_ssn", "co_buyer_dob"} + +# PII labels that appear in the field definitions — they must not leak into the +# prompt either (the prompt lists fields by label). +PII_LABELS = {"SSN", "Date of Birth", "Monthly Income"} + + +class _FakeResponse: + """Mimics the genai generate_content response (exposes a .text attribute).""" + + def __init__(self, text: str): + self.text = text + + +class _FakeModels: + def __init__(self, captured: dict, response_json: str): + self._captured = captured + self._response_json = response_json + + def generate_content(self, model=None, contents=None): + # Capture exactly what would be sent to Gemini so the test can inspect it. + self._captured["model"] = model + self._captured["contents"] = contents + return _FakeResponse(self._response_json) + + +class _FakeClient: + def __init__(self, captured: dict, response_json: str): + self.models = _FakeModels(captured, response_json) + + +def _install_fake_genai(captured: dict, response_json: str): + """Patch `google.genai.Client` so no network call is made. + + `extract_form_data_from_session` does `from google import genai` at call + time and then calls `genai.Client()`. Patching the `Client` symbol on the + real submodule makes the fake visible to that import (a sys.modules patch + alone does not work, because `from google import genai` resolves `genai` as + an attribute of the already-imported `google` package object). + """ + + def _client_factory(*args, **kwargs): + return _FakeClient(captured, response_json) + + return patch("google.genai.Client", _client_factory) + + +def _run(coro): + return asyncio.run(coro) + + +def test_pii_field_definitions_are_stripped_from_llm_prompt(): + """PII-redaction path: the prompt sent to Gemini must not reference any + SSN / DOB / income field name or label.""" + captured: dict = {} + # LLM returns only safe, validated fields. + response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}' + + with _install_fake_genai(captured, response_json), patch.object( + form_extraction, + "_get_conversation_text", + return_value=CONVERSATION, + ) as _mock_convo: + # Make the patched _get_conversation_text awaitable. + async def _fake_convo(session_id, runner=None): + return CONVERSATION + + _mock_convo.side_effect = _fake_convo + + result = _run( + form_extraction.extract_form_data_from_session( + session_id="sess-1", + template_name=TEMPLATE, + runner=None, + ) + ) + + prompt = captured.get("contents") + assert prompt is not None, "Gemini was never called / prompt not captured" + + # No PII field *name* should appear in the prompt's field list. + for pii_field in PII_FIELD_NAMES: + assert pii_field not in prompt, f"PII field name leaked into LLM prompt: {pii_field}" + + # No PII *label* should appear in the listed extractable fields. We check the + # field-list region of the prompt (the section after "Fields to look for:") + # so the standing safety instruction ("Do NOT extract any SSN, date of + # birth, income...") doesn't trip the assertion. + field_list_region = prompt.split("Conversation:")[0] + fields_to_look_for = field_list_region.split("Fields to look for:")[-1] + for label in PII_LABELS: + assert ( + label not in fields_to_look_for + ), f"PII label leaked into the extractable-field list: {label}" + + # The result itself must not contain any PII field. + extracted = result["extracted_data"] + for pii_field in PII_FIELD_NAMES: + assert pii_field not in extracted, f"PII field present in extracted result: {pii_field}" + + +def test_non_pii_fields_pass_through_to_prompt_and_result(): + """Happy path: normal fields are offered to the model and returned.""" + captured: dict = {} + response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}' + + with _install_fake_genai(captured, response_json), patch.object( + form_extraction, "_get_conversation_text" + ) as mock_convo: + + async def _fake_convo(session_id, runner=None): + return CONVERSATION + + mock_convo.side_effect = _fake_convo + + result = _run( + form_extraction.extract_form_data_from_session( + session_id="sess-2", + template_name=TEMPLATE, + runner=None, + ) + ) + + prompt = captured["contents"] + # Representative non-PII fields are present in the prompt. + assert "buyer_name" in prompt + assert "employer_name" in prompt + + # And the extracted result carries them through. + extracted = result["extracted_data"] + assert extracted.get("buyer_name") == "John Doe" + assert extracted.get("employer_name") == "Acme Corp" + + +def test_llm_returned_pii_keys_are_dropped_from_result(): + """Defense in depth: even if the model echoes back a PII key, the validation + step drops it because it is not in the safe-field allowlist.""" + captured: dict = {} + # Adversarial: model tries to return an SSN/DOB value. + response_json = ( + '{"buyer_name": "John Doe", "buyer_ssn": "123-45-6789", ' + '"buyer_dob": "01/02/1980"}' + ) + + with _install_fake_genai(captured, response_json), patch.object( + form_extraction, "_get_conversation_text" + ) as mock_convo: + + async def _fake_convo(session_id, runner=None): + return CONVERSATION + + mock_convo.side_effect = _fake_convo + + result = _run( + form_extraction.extract_form_data_from_session( + session_id="sess-3", + template_name=TEMPLATE, + runner=None, + ) + ) + + extracted = result["extracted_data"] + assert "buyer_ssn" not in extracted + assert "buyer_dob" not in extracted + assert extracted.get("buyer_name") == "John Doe" + + +def test_template_safe_fields_exclude_all_pii_definitions(): + """Sanity check on the field registry itself: every field flagged pii=True + for this template is excluded from the safe (LLM-bound) field set, mirroring + the filter inside extract_form_data_from_session.""" + template_fields = get_fields_for_template(TEMPLATE) + pii_in_template = { + name for name, defn in template_fields.items() if defn.get("pii", False) + } + # Template must actually contain PII fields, else the test proves nothing. + assert pii_in_template, "Expected creditapp.pdf to define PII fields" + + safe_fields = { + name: defn + for name, defn in template_fields.items() + if not defn.get("pii", False) + } + # No PII field name survives into the safe set. + assert pii_in_template.isdisjoint(safe_fields.keys()) + # Specifically SSN and DOB are gone. + assert "buyer_ssn" not in safe_fields + assert "buyer_dob" not in safe_fields diff --git a/tests/test_lead_attribution.py b/tests/test_lead_attribution.py new file mode 100644 index 0000000..34561b1 --- /dev/null +++ b/tests/test_lead_attribution.py @@ -0,0 +1,77 @@ +"""Lead-source attribution categorization tests. + +``main._categorize_lead_source`` maps a Lead-like object onto the coarse +buckets the CRM attribution chart consumes. Its precedence is: + + utm_source > referrer > raw source bucket + +The real ``Lead`` dataclass has no ``utm_source`` / ``referrer`` fields yet — +the function reads them defensively with ``getattr(..., None)`` so it stays +crash-free if they ever land. We exercise those branches with a lightweight +``SimpleNamespace`` fake (no Firestore), and cover the raw-source fallbacks +with the actual ``Lead`` dataclass. +""" + +from types import SimpleNamespace + +from lead_management import Lead +from main import _categorize_lead_source + + +def _fake_lead(source=None, utm_source=None, referrer=None): + """A minimal Lead-like object — no Firestore, no dataclass overhead.""" + return SimpleNamespace(source=source, utm_source=utm_source, referrer=referrer) + + +def test_utm_source_takes_priority_and_is_lowercased(): + # utm_source="Instagram" -> "utm:instagram"; utm wins even when a raw + # source and referrer are also present. + lead = _fake_lead(source="chat", utm_source="Instagram", referrer="https://t.co/x") + assert _categorize_lead_source(lead) == "utm:instagram" + + +def test_referrer_falls_back_to_host(): + # No utm -> referrer host (protocol + path stripped). + lead = _fake_lead(referrer="https://www.google.com/search?q=mobile+homes") + assert _categorize_lead_source(lead) == "referrer:www.google.com" + + +def test_referrer_host_is_truncated_to_40_chars(): + host = "a" * 60 + lead = _fake_lead(referrer=f"http://{host}.com/path") + result = _categorize_lead_source(lead) + assert result.startswith("referrer:") + assert result == "referrer:" + ("a" * 40) + + +def test_referrer_with_no_host_becomes_direct(): + lead = _fake_lead(referrer="https://") + assert _categorize_lead_source(lead) == "referrer:direct" + + +def test_known_raw_source_bucket(): + # No utm / referrer -> the raw source bucket. "chat" passes through. + lead = _fake_lead(source="chat") + assert _categorize_lead_source(lead) == "chat" + + +def test_chat_intake_normalized_to_chat(): + lead = _fake_lead(source="chat_intake") + assert _categorize_lead_source(lead) == "chat" + + +def test_empty_source_is_other(): + lead = _fake_lead(source="") + assert _categorize_lead_source(lead) == "other" + + +def test_unknown_raw_source_passes_through_lowercased(): + lead = _fake_lead(source="Facebook_Ad") + assert _categorize_lead_source(lead) == "facebook_ad" + + +def test_real_lead_dataclass_uses_source_bucket(): + # A genuine Lead has no utm_source/referrer attrs; getattr defaults keep + # the function on the raw-source branch without raising AttributeError. + lead = Lead(lead_id="L1", user_id="U1", session_id="S1", source="instagram") + assert _categorize_lead_source(lead) == "instagram" diff --git a/tests/test_social_publishers.py b/tests/test_social_publishers.py new file mode 100644 index 0000000..67cc012 --- /dev/null +++ b/tests/test_social_publishers.py @@ -0,0 +1,207 @@ +"""Tests for the fail-closed social publishing adapters. + +These assert the safety contract of ``tools/social_publishers.py``: +nothing is published unless the explicit ``THO_SOCIAL_PUBLISH_ENABLED`` gate is +on and the required tokens are configured, readiness reporting is accurate, and +the opt-in UTM CTA link builder stays a strict no-op by default. + +All environment access is monkeypatched and the module-level ``requests`` is +replaced with a guard that fails loudly if any test would make a real HTTP call. +""" + +from __future__ import annotations + +import pytest + +from tools import social_publishers + +# Every env var the module reads. We clear all of them per test so the suite is +# hermetic regardless of the developer's shell or CI secrets. +_SOCIAL_ENV_VARS = ( + "THO_SOCIAL_PUBLISH_ENABLED", + "THO_UTM_CTA_ENABLED", + "THO_UTM_SOURCE", + "THO_UTM_MEDIUM", + "PUBLIC_SITE_URL", + "TIKTOK_ACCESS_TOKEN", + "TIKTOK_PRIVACY_LEVEL", + "META_ACCESS_TOKEN", + "META_GRAPH_VERSION", + "INSTAGRAM_BUSINESS_ACCOUNT_ID", +) + + +class _NoHTTP: + """Stand-in for ``requests`` that fails if any HTTP method is invoked.""" + + def __getattr__(self, name: str): + def _boom(*args, **kwargs): # pragma: no cover - only fires on misuse + raise AssertionError( + f"Unexpected real HTTP call: requests.{name}({args!r}, {kwargs!r})" + ) + + return _boom + + +@pytest.fixture(autouse=True) +def _isolate_env_and_block_http(monkeypatch): + """Clear all social env vars and forbid real HTTP for every test.""" + for var in _SOCIAL_ENV_VARS: + monkeypatch.delenv(var, raising=False) + # The module imports ``requests`` at module scope, so patch it there. + monkeypatch.setattr(social_publishers, "requests", _NoHTTP()) + # _canonical_origin() can fall back to config_loader.get_business(); keep the + # CTA tests deterministic by relying only on PUBLIC_SITE_URL (set per test). + return monkeypatch + + +# --------------------------------------------------------------------------- +# (1) prepare_or_publish_social_post returns a draft (no publish) when the +# THO_SOCIAL_PUBLISH_ENABLED gate is unset. +# --------------------------------------------------------------------------- + + +def test_prepare_returns_draft_when_publish_gate_unset(monkeypatch): + # Fully configure tiktok tokens + site URL so the ONLY thing missing is the + # publish gate. This isolates the gate as the reason it stays a draft. + monkeypatch.setenv("TIKTOK_ACCESS_TOKEN", "tok-abc") + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com") + # THO_SOCIAL_PUBLISH_ENABLED intentionally left unset. + + result = social_publishers.prepare_or_publish_social_post( + platform="tiktok", + content_type="video", + scheduled_time="2026-07-01T12:00:00", + caption="New listing tour", + hashtags=["#texashomes"], + video_url="https://cdn.example.com/clip.mp4", + ) + + assert result["success"] is True + assert result["status"] == "draft_ready" + assert result["live_integration"] is False + assert result["publish_attempted"] is False + assert result["post_id"].startswith("DRAFT-") + # The blocking reason must point at the disabled publish gate. + assert "THO_SOCIAL_PUBLISH_ENABLED" in result["publish_blocked_reason"] + # readiness embedded in the draft confirms publish is not enabled. + assert result["social_readiness"]["publish_enabled"] is False + + +def test_prepare_draft_does_not_call_requests(monkeypatch): + # Even with tokens present, an unset gate must not reach the network. The + # autouse _NoHTTP guard would raise AssertionError if it did. + monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok") + monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123") + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com") + + result = social_publishers.prepare_or_publish_social_post( + platform="instagram_reels", + content_type="video", + scheduled_time="2026-07-01T12:00:00", + caption="Reel", + video_url="https://cdn.example.com/reel.mp4", + ) + + assert result["status"] == "draft_ready" + assert result["publish_attempted"] is False + + +# --------------------------------------------------------------------------- +# (2) social_readiness() reports instagram_reels configured=false with the +# correct required_env when tokens are absent, true when present. +# --------------------------------------------------------------------------- + + +def test_social_readiness_instagram_unconfigured_lists_required_env(monkeypatch): + # No META/IG/site tokens set (cleared by autouse fixture). + readiness = social_publishers.social_readiness() + ig = readiness["platforms"]["instagram_reels"] + + assert ig["configured"] is False + # All three inputs are missing, so all three must be reported. + assert set(ig["required_env"]) == { + "META_ACCESS_TOKEN", + "INSTAGRAM_BUSINESS_ACCOUNT_ID", + "PUBLIC_SITE_URL", + } + assert ig["api"] == "Meta Instagram Content Publishing API" + assert readiness["publish_enabled"] is False + + +def test_social_readiness_instagram_partial_lists_only_missing(monkeypatch): + # Token present, account id + site URL still missing. + monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok") + + ig = social_publishers.social_readiness()["platforms"]["instagram_reels"] + + assert ig["configured"] is False + assert set(ig["required_env"]) == { + "INSTAGRAM_BUSINESS_ACCOUNT_ID", + "PUBLIC_SITE_URL", + } + + +def test_social_readiness_instagram_configured_when_all_present(monkeypatch): + monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok") + monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123") + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com") + + ig = social_publishers.social_readiness()["platforms"]["instagram_reels"] + + assert ig["configured"] is True + assert ig["required_env"] == [] + + +# --------------------------------------------------------------------------- +# (3) The UTM CTA link builder returns None when THO_UTM_CTA_ENABLED is unset, +# and a correctly-tagged URL when enabled + a canonical origin is present. +# --------------------------------------------------------------------------- + + +def test_utm_cta_link_none_when_gate_unset(monkeypatch): + # Origin present but the opt-in gate is off -> strict no-op. + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com") + + assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None + + +def test_utm_cta_link_none_when_enabled_but_no_origin(monkeypatch): + # Gate on but no resolvable origin -> still no-op. Block the config_loader + # fallback so the absence of PUBLIC_SITE_URL truly means "no origin". + monkeypatch.setenv("THO_UTM_CTA_ENABLED", "true") + monkeypatch.setattr( + social_publishers, "_canonical_origin", lambda: None + ) + + assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None + + +def test_utm_cta_link_tagged_url_when_enabled_with_origin(monkeypatch): + monkeypatch.setenv("THO_UTM_CTA_ENABLED", "1") + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com/") + + link = social_publishers._utm_cta_link("tiktok", "Spring Sale 2026") + + assert link is not None + # Trailing slash on origin is stripped before composing the URL. + assert link.startswith("https://example.com/?") + assert "utm_source=tiktok" in link + assert "utm_medium=social" in link + # Campaign is slugified: lowercased, non-alnum runs -> single hyphen. + assert "utm_campaign=spring-sale-2026" in link + + +def test_utm_cta_link_respects_source_and_medium_overrides(monkeypatch): + monkeypatch.setenv("THO_UTM_CTA_ENABLED", "yes") + monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com") + monkeypatch.setenv("THO_UTM_SOURCE", "ig") + monkeypatch.setenv("THO_UTM_MEDIUM", "paid_social") + + link = social_publishers._utm_cta_link("instagram_reels", None) + + assert link is not None + assert "utm_source=ig" in link + assert "utm_medium=paid_social" in link + # Empty campaign falls back to the "ad-studio" default token. + assert "utm_campaign=ad-studio" in link From 4a2611c85e5abb8d3135e5a0f345d04f73903f06 Mon Sep 17 00:00:00 2001 From: arigatoexpress <95630102+arigatoexpress@users.noreply.github.com> Date: Tue, 16 Jun 2026 21:05:55 -0600 Subject: [PATCH 2/2] test(attribution): import main via Firestore-stubbed fixture (CI fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_lead_attribution.py imported `main` at module top, which instantiates a Firestore client at import — failing collection in CI's no-creds "no Firestore/ GCS" job (passed locally only because of developer ADC). Route the import through create_client(monkeypatch) like the rest of the suite, so main loads with Firestore stubbed. Verified: 9 passed with GCP credentials unset. Co-Authored-By: Claude Opus 4.8 --- tests/test_lead_attribution.py | 61 +++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/tests/test_lead_attribution.py b/tests/test_lead_attribution.py index 34561b1..a0cb3a9 100644 --- a/tests/test_lead_attribution.py +++ b/tests/test_lead_attribution.py @@ -10,12 +10,35 @@ crash-free if they ever land. We exercise those branches with a lightweight ``SimpleNamespace`` fake (no Firestore), and cover the raw-source fallbacks with the actual ``Lead`` dataclass. + +``main`` instantiates a Firestore client at import (``lead_manager = +LeadManager(...)``), which needs GCP credentials the CI "no Firestore/GCS" +job lacks. So ``_categorize_lead_source`` is pulled in via the ``categorize`` +fixture, which first calls ``create_client`` to stub those eager imports the +same way the rest of the suite does — importing ``main`` raw at module top +errors during collection in a creds-less environment. """ +import sys +from pathlib import Path from types import SimpleNamespace +import pytest + from lead_management import Lead -from main import _categorize_lead_source + +sys.path.insert(0, str(Path(__file__).parent)) + + +@pytest.fixture +def categorize(monkeypatch): + """Return ``main._categorize_lead_source`` with Firestore stubbed out.""" + from test_api_v1 import create_client + + create_client(monkeypatch) + from main import _categorize_lead_source + + return _categorize_lead_source def _fake_lead(source=None, utm_source=None, referrer=None): @@ -23,55 +46,55 @@ def _fake_lead(source=None, utm_source=None, referrer=None): return SimpleNamespace(source=source, utm_source=utm_source, referrer=referrer) -def test_utm_source_takes_priority_and_is_lowercased(): +def test_utm_source_takes_priority_and_is_lowercased(categorize): # utm_source="Instagram" -> "utm:instagram"; utm wins even when a raw # source and referrer are also present. lead = _fake_lead(source="chat", utm_source="Instagram", referrer="https://t.co/x") - assert _categorize_lead_source(lead) == "utm:instagram" + assert categorize(lead) == "utm:instagram" -def test_referrer_falls_back_to_host(): +def test_referrer_falls_back_to_host(categorize): # No utm -> referrer host (protocol + path stripped). lead = _fake_lead(referrer="https://www.google.com/search?q=mobile+homes") - assert _categorize_lead_source(lead) == "referrer:www.google.com" + assert categorize(lead) == "referrer:www.google.com" -def test_referrer_host_is_truncated_to_40_chars(): +def test_referrer_host_is_truncated_to_40_chars(categorize): host = "a" * 60 lead = _fake_lead(referrer=f"http://{host}.com/path") - result = _categorize_lead_source(lead) + result = categorize(lead) assert result.startswith("referrer:") assert result == "referrer:" + ("a" * 40) -def test_referrer_with_no_host_becomes_direct(): +def test_referrer_with_no_host_becomes_direct(categorize): lead = _fake_lead(referrer="https://") - assert _categorize_lead_source(lead) == "referrer:direct" + assert categorize(lead) == "referrer:direct" -def test_known_raw_source_bucket(): +def test_known_raw_source_bucket(categorize): # No utm / referrer -> the raw source bucket. "chat" passes through. lead = _fake_lead(source="chat") - assert _categorize_lead_source(lead) == "chat" + assert categorize(lead) == "chat" -def test_chat_intake_normalized_to_chat(): +def test_chat_intake_normalized_to_chat(categorize): lead = _fake_lead(source="chat_intake") - assert _categorize_lead_source(lead) == "chat" + assert categorize(lead) == "chat" -def test_empty_source_is_other(): +def test_empty_source_is_other(categorize): lead = _fake_lead(source="") - assert _categorize_lead_source(lead) == "other" + assert categorize(lead) == "other" -def test_unknown_raw_source_passes_through_lowercased(): +def test_unknown_raw_source_passes_through_lowercased(categorize): lead = _fake_lead(source="Facebook_Ad") - assert _categorize_lead_source(lead) == "facebook_ad" + assert categorize(lead) == "facebook_ad" -def test_real_lead_dataclass_uses_source_bucket(): +def test_real_lead_dataclass_uses_source_bucket(categorize): # A genuine Lead has no utm_source/referrer attrs; getattr defaults keep # the function on the raw-source branch without raising AttributeError. lead = Lead(lead_id="L1", user_id="U1", session_id="S1", source="instagram") - assert _categorize_lead_source(lead) == "instagram" + assert categorize(lead) == "instagram"