diff --git a/caching.py b/caching.py
index 7aaffbd..eece988 100644
--- a/caching.py
+++ b/caching.py
@@ -114,8 +114,8 @@ def cache_delete(key: str):
if client:
try:
client.delete(key)
- except Exception:
- pass
+ except Exception as e:
+ logger.warning(f"Redis delete error for {key}: {e}")
if key in _local_cache:
del _local_cache[key]
diff --git a/mira_routes.py b/mira_routes.py
index 8dbc341..0d231f9 100644
--- a/mira_routes.py
+++ b/mira_routes.py
@@ -97,7 +97,10 @@ def _parse_timestamp(value: Any) -> datetime | None:
if dt.tzinfo is None:
dt = dt.replace(tzinfo=UTC)
return dt.astimezone(UTC)
- except Exception:
+ except Exception as e:
+ struct_logger.warning(
+ "mira timestamp parse failed", value=value, error=str(e)
+ )
return None
return None
@@ -109,7 +112,12 @@ def _count_collection_by_status(collection_name: str, status_field: str = "statu
docs = db.collection(collection_name).stream()
statuses = [doc.to_dict().get(status_field, "UNKNOWN") for doc in docs]
return dict(Counter(statuses))
- except Exception:
+ except Exception as e:
+ struct_logger.warning(
+ "mira collection status count failed",
+ collection=collection_name,
+ error=str(e),
+ )
return {"UNKNOWN": 0}
@@ -609,7 +617,10 @@ async def mira_firestore_collections(request: Request, limit: int = 1000) -> dic
try:
count = len(list(col.limit(limit).stream()))
result.append({"collection": col.id, "count": count})
- except Exception:
+ except Exception as e:
+ struct_logger.warning(
+ "mira collection count failed", collection=col.id, error=str(e)
+ )
result.append({"collection": col.id, "count": None})
return {
"status": "healthy",
diff --git a/seo_routes.py b/seo_routes.py
index e8cd62e..fa76522 100644
--- a/seo_routes.py
+++ b/seo_routes.py
@@ -21,6 +21,7 @@
import html
import json
+import logging
import os
import re
import threading
@@ -44,6 +45,8 @@
router = APIRouter()
+logger = logging.getLogger(__name__)
+
# ── Wiring (set by main.py at startup) ─────────────────────────────────────
_get_homes = None # callable -> list[dict]; the merged public inventory
@@ -197,8 +200,9 @@ def _safe_homes() -> list[dict]:
return []
try:
return _get_homes() or []
- except Exception:
+ except Exception as e:
# SEO surface must never take the page down with it.
+ logger.warning(f"SEO _safe_homes: inventory fetch failed, serving empty: {e}")
return []
@@ -353,7 +357,8 @@ def _product_jsonld(home: dict, canonical_url: str) -> dict | None:
price = home.get("price_value")
try:
price = float(price) if price is not None else None
- except (TypeError, ValueError):
+ except (TypeError, ValueError) as e:
+ logger.warning(f"SEO _product_jsonld: unparseable price_value {price!r}, omitting Product JSON-LD: {e}")
price = None
if not (price and price > 0):
return None
@@ -416,7 +421,8 @@ def _shell() -> str:
content = f.read()
_shell_cache = (mtime, content)
return content
- except OSError:
+ except OSError as e:
+ logger.warning(f"SEO _shell: cannot read index shell {_index_html_path!r}, using minimal fallback: {e}")
return '
'
@@ -1047,7 +1053,8 @@ def render_spa_response(full_path: str) -> Response | None:
to its default file handling. Never raises."""
try:
return _render_spa_response(full_path)
- except Exception:
+ except Exception as e:
+ logger.warning(f"SEO render_spa_response: rendering failed for {full_path!r}, falling through: {e}")
return None
diff --git a/tests/test_contact_lead_capture.py b/tests/test_contact_lead_capture.py
index 25bb05e..9e60ce6 100644
--- a/tests/test_contact_lead_capture.py
+++ b/tests/test_contact_lead_capture.py
@@ -56,3 +56,51 @@ async def boom(_lead):
assert body["success"] is True
# ... but the dropped lead is now loud + alertable.
assert "lead_storage_failed" in body.get("warnings", [])
+
+
+def test_contact_creates_lead_with_name_phone_and_source(monkeypatch):
+ """A valid name+phone POST persists a Lead carrying those fields + source."""
+ client, main, *_ = create_client(monkeypatch)
+ before = len(main.lead_manager.leads)
+
+ body = _post(
+ client, name="Carol", phone="(281) 324-3020", email="carol@example.com"
+ ).json()
+ assert body["success"] is True
+
+ # FakeLeadManager.create_lead appends the persisted Lead to .leads.
+ assert len(main.lead_manager.leads) == before + 1
+ created = main.lead_manager.leads[-1]
+ assert created.name == "Carol"
+ assert created.phone == "(281) 324-3020"
+ assert created.email == "carol@example.com"
+ assert created.source == "contact_form" # default source for the contact form
+
+
+def test_contact_lead_carries_explicit_source(monkeypatch):
+ """A caller-supplied `source` flows through to the persisted Lead."""
+ client, main, *_ = create_client(monkeypatch)
+ before = len(main.lead_manager.leads)
+
+ body = _post(
+ client, name="Dave", phone="2813243020", source="facebook_ad"
+ ).json()
+ assert body["success"] is True
+
+ assert len(main.lead_manager.leads) == before + 1
+ created = main.lead_manager.leads[-1]
+ assert created.name == "Dave"
+ assert created.phone == "2813243020"
+ assert created.source == "facebook_ad"
+
+
+def test_contact_invalid_phone_rejected_and_creates_no_lead(monkeypatch):
+ """An invalid/short phone is rejected (success=false) and no Lead is stored."""
+ client, main, *_ = create_client(monkeypatch)
+ before = len(main.lead_manager.leads)
+
+ body = _post(client, name="Eve", phone="55512").json()
+ assert body["success"] is False
+ assert "error" in body
+ # Rejection happens before lead creation, so nothing is persisted.
+ assert len(main.lead_manager.leads) == before
diff --git a/tests/test_crm.py b/tests/test_crm.py
index 9f48573..39de6e4 100644
--- a/tests/test_crm.py
+++ b/tests/test_crm.py
@@ -1,48 +1,66 @@
-import json
-import os
-import sys
+"""Tests for tools.crm_tools.save_lead — lead capture validation + contract.
+
+Replaces a legacy smoke *script* that called ``save_lead`` at module top level,
+which (a) provided zero pytest coverage and (b) appended a fake lead to
+``data/leads.json`` every time the suite was merely collected. These tests pin
+the validation rules and success contract without writing to disk.
+"""
+
+import pytest
-# Add project root to path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from tools import crm_tools
-# Test 1: Valid Lead
-print("Testing valid lead...")
-result = crm_tools.save_lead(
- user_name="John Doe",
- phone_number="555-123-4567",
- interest_notes="Looking for a 3 bedroom double wide.",
-)
-print(f"Result: {result}")
-
-if result["success"]:
- print("SUCCESS: Valid lead accepted.")
-else:
- print("FAILURE: Valid lead rejected.")
-
-# Test 2: Invalid Phone
-print("\nTesting invalid phone...")
-result = crm_tools.save_lead(user_name="Invalid Phone", phone_number="123", interest_notes="test")
-print(f"Result: {result}")
-
-if not result["success"]:
- print("SUCCESS: Invalid phone rejected.")
-else:
- print("FAILURE: Invalid phone accepted.")
-
-# Test 3: Check local file (if writable)
-try:
- data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
- leads_file = os.path.join(data_dir, "leads.json")
- if os.path.exists(leads_file):
- with open(leads_file) as f:
- leads = json.load(f)
- last_lead = leads[-1]
- if last_lead["name"] == "John Doe":
- print("\nSUCCESS: Lead found in local JSON file.")
- else:
- print(f"\nFAILURE: Last lead was {last_lead['name']}, expected John Doe.")
- else:
- print("\nNOTE: leads.json not found (expected if data dir not writable/created).")
-except Exception as e:
- print(f"\nError checking file: {e}")
+
+@pytest.fixture(autouse=True)
+def _no_disk_writes(monkeypatch):
+ """Keep save_lead's success path from appending to the repo's data/leads.json.
+
+ save_lead guards its file write behind ``os.access(..., os.W_OK)``; forcing
+ that False exercises the full structure-and-return logic while skipping the
+ side effect, so collecting/running tests never pollutes local lead data.
+ """
+ monkeypatch.setattr(crm_tools.os, "access", lambda *a, **k: False)
+
+
+def test_save_lead_valid_returns_success():
+ result = crm_tools.save_lead(
+ user_name="John Doe",
+ phone_number="555-123-4567",
+ interest_notes="Looking for a 3 bedroom double wide.",
+ )
+ assert result["success"] is True
+ # Confirmation echoes the customer's name and number back to the agent.
+ assert "John Doe" in result["message"]
+ assert "555-123-4567" in result["message"]
+
+
+def test_save_lead_accepts_formatted_phone():
+ result = crm_tools.save_lead(
+ user_name="Jane Smith",
+ phone_number="(281) 555-0100",
+ interest_notes="Financing question",
+ )
+ assert result["success"] is True
+
+
+def test_save_lead_rejects_missing_name():
+ result = crm_tools.save_lead(
+ user_name="", phone_number="555-123-4567", interest_notes="test"
+ )
+ assert result["success"] is False
+ assert "name" in result["message"].lower()
+
+
+def test_save_lead_rejects_missing_phone():
+ result = crm_tools.save_lead(
+ user_name="No Phone", phone_number="", interest_notes="test"
+ )
+ assert result["success"] is False
+
+
+def test_save_lead_rejects_short_phone():
+ result = crm_tools.save_lead(
+ user_name="Short Phone", phone_number="123", interest_notes="test"
+ )
+ assert result["success"] is False
+ assert "invalid" in result["message"].lower() or "10" in result["message"]
diff --git a/tests/test_form_extraction.py b/tests/test_form_extraction.py
new file mode 100644
index 0000000..5f9b672
--- /dev/null
+++ b/tests/test_form_extraction.py
@@ -0,0 +1,229 @@
+"""Tests for tools/form_extraction.py — PII filtering before LLM calls.
+
+These tests prove the guardrail in CLAUDE.md: "Never send PII to LLM — strip PII
+fields before Gemini API calls." We confirm that:
+ * SSN / DOB / income field definitions are NOT included in the extraction prompt
+ that gets sent to Gemini (PII-redaction path).
+ * Normal (non-PII) fields still flow through to the prompt and the result
+ (happy path).
+ * Even if the LLM hallucinates a PII field back, it is dropped from the result.
+
+The Gemini/genai call is fully mocked — no network calls are made.
+
+Run: python -m pytest tests/test_form_extraction.py -q
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import google.genai # noqa: F401 (ensure the real submodule is importable before patching)
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from config.field_map_loader import get_fields_for_template
+from tools import form_extraction
+
+# Template that mixes PII fields (SSN, Date of Birth) with normal fields.
+TEMPLATE = "creditapp.pdf"
+
+# A conversation transcript that contains raw PII values. The extraction layer
+# should never surface these as *fields* to the model, and the field metadata it
+# does send must not reference SSN/DOB/income.
+CONVERSATION = (
+ "user: Hi, I'm John Doe and my employer is Acme Corp.\n"
+ "user: My SSN is 123-45-6789 and I was born on 01/02/1980.\n"
+ "user: My monthly income is $5000 and my phone is 555-123-4567."
+)
+
+# PII data-field names (from config/field_map.json) that must never be offered
+# to the LLM as extractable fields.
+PII_FIELD_NAMES = {"buyer_ssn", "buyer_dob", "buyer_income", "co_buyer_ssn", "co_buyer_dob"}
+
+# PII labels that appear in the field definitions — they must not leak into the
+# prompt either (the prompt lists fields by label).
+PII_LABELS = {"SSN", "Date of Birth", "Monthly Income"}
+
+
+class _FakeResponse:
+ """Mimics the genai generate_content response (exposes a .text attribute)."""
+
+ def __init__(self, text: str):
+ self.text = text
+
+
+class _FakeModels:
+ def __init__(self, captured: dict, response_json: str):
+ self._captured = captured
+ self._response_json = response_json
+
+ def generate_content(self, model=None, contents=None):
+ # Capture exactly what would be sent to Gemini so the test can inspect it.
+ self._captured["model"] = model
+ self._captured["contents"] = contents
+ return _FakeResponse(self._response_json)
+
+
+class _FakeClient:
+ def __init__(self, captured: dict, response_json: str):
+ self.models = _FakeModels(captured, response_json)
+
+
+def _install_fake_genai(captured: dict, response_json: str):
+ """Patch `google.genai.Client` so no network call is made.
+
+ `extract_form_data_from_session` does `from google import genai` at call
+ time and then calls `genai.Client()`. Patching the `Client` symbol on the
+ real submodule makes the fake visible to that import (a sys.modules patch
+ alone does not work, because `from google import genai` resolves `genai` as
+ an attribute of the already-imported `google` package object).
+ """
+
+ def _client_factory(*args, **kwargs):
+ return _FakeClient(captured, response_json)
+
+ return patch("google.genai.Client", _client_factory)
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def test_pii_field_definitions_are_stripped_from_llm_prompt():
+ """PII-redaction path: the prompt sent to Gemini must not reference any
+ SSN / DOB / income field name or label."""
+ captured: dict = {}
+ # LLM returns only safe, validated fields.
+ response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}'
+
+ with _install_fake_genai(captured, response_json), patch.object(
+ form_extraction,
+ "_get_conversation_text",
+ return_value=CONVERSATION,
+ ) as _mock_convo:
+ # Make the patched _get_conversation_text awaitable.
+ async def _fake_convo(session_id, runner=None):
+ return CONVERSATION
+
+ _mock_convo.side_effect = _fake_convo
+
+ result = _run(
+ form_extraction.extract_form_data_from_session(
+ session_id="sess-1",
+ template_name=TEMPLATE,
+ runner=None,
+ )
+ )
+
+ prompt = captured.get("contents")
+ assert prompt is not None, "Gemini was never called / prompt not captured"
+
+ # No PII field *name* should appear in the prompt's field list.
+ for pii_field in PII_FIELD_NAMES:
+ assert pii_field not in prompt, f"PII field name leaked into LLM prompt: {pii_field}"
+
+ # No PII *label* should appear in the listed extractable fields. We check the
+ # field-list region of the prompt (the section after "Fields to look for:")
+ # so the standing safety instruction ("Do NOT extract any SSN, date of
+ # birth, income...") doesn't trip the assertion.
+ field_list_region = prompt.split("Conversation:")[0]
+ fields_to_look_for = field_list_region.split("Fields to look for:")[-1]
+ for label in PII_LABELS:
+ assert (
+ label not in fields_to_look_for
+ ), f"PII label leaked into the extractable-field list: {label}"
+
+ # The result itself must not contain any PII field.
+ extracted = result["extracted_data"]
+ for pii_field in PII_FIELD_NAMES:
+ assert pii_field not in extracted, f"PII field present in extracted result: {pii_field}"
+
+
+def test_non_pii_fields_pass_through_to_prompt_and_result():
+ """Happy path: normal fields are offered to the model and returned."""
+ captured: dict = {}
+ response_json = '{"buyer_name": "John Doe", "employer_name": "Acme Corp"}'
+
+ with _install_fake_genai(captured, response_json), patch.object(
+ form_extraction, "_get_conversation_text"
+ ) as mock_convo:
+
+ async def _fake_convo(session_id, runner=None):
+ return CONVERSATION
+
+ mock_convo.side_effect = _fake_convo
+
+ result = _run(
+ form_extraction.extract_form_data_from_session(
+ session_id="sess-2",
+ template_name=TEMPLATE,
+ runner=None,
+ )
+ )
+
+ prompt = captured["contents"]
+ # Representative non-PII fields are present in the prompt.
+ assert "buyer_name" in prompt
+ assert "employer_name" in prompt
+
+ # And the extracted result carries them through.
+ extracted = result["extracted_data"]
+ assert extracted.get("buyer_name") == "John Doe"
+ assert extracted.get("employer_name") == "Acme Corp"
+
+
+def test_llm_returned_pii_keys_are_dropped_from_result():
+ """Defense in depth: even if the model echoes back a PII key, the validation
+ step drops it because it is not in the safe-field allowlist."""
+ captured: dict = {}
+ # Adversarial: model tries to return an SSN/DOB value.
+ response_json = (
+ '{"buyer_name": "John Doe", "buyer_ssn": "123-45-6789", '
+ '"buyer_dob": "01/02/1980"}'
+ )
+
+ with _install_fake_genai(captured, response_json), patch.object(
+ form_extraction, "_get_conversation_text"
+ ) as mock_convo:
+
+ async def _fake_convo(session_id, runner=None):
+ return CONVERSATION
+
+ mock_convo.side_effect = _fake_convo
+
+ result = _run(
+ form_extraction.extract_form_data_from_session(
+ session_id="sess-3",
+ template_name=TEMPLATE,
+ runner=None,
+ )
+ )
+
+ extracted = result["extracted_data"]
+ assert "buyer_ssn" not in extracted
+ assert "buyer_dob" not in extracted
+ assert extracted.get("buyer_name") == "John Doe"
+
+
+def test_template_safe_fields_exclude_all_pii_definitions():
+ """Sanity check on the field registry itself: every field flagged pii=True
+ for this template is excluded from the safe (LLM-bound) field set, mirroring
+ the filter inside extract_form_data_from_session."""
+ template_fields = get_fields_for_template(TEMPLATE)
+ pii_in_template = {
+ name for name, defn in template_fields.items() if defn.get("pii", False)
+ }
+ # Template must actually contain PII fields, else the test proves nothing.
+ assert pii_in_template, "Expected creditapp.pdf to define PII fields"
+
+ safe_fields = {
+ name: defn
+ for name, defn in template_fields.items()
+ if not defn.get("pii", False)
+ }
+ # No PII field name survives into the safe set.
+ assert pii_in_template.isdisjoint(safe_fields.keys())
+ # Specifically SSN and DOB are gone.
+ assert "buyer_ssn" not in safe_fields
+ assert "buyer_dob" not in safe_fields
diff --git a/tests/test_lead_attribution.py b/tests/test_lead_attribution.py
new file mode 100644
index 0000000..a0cb3a9
--- /dev/null
+++ b/tests/test_lead_attribution.py
@@ -0,0 +1,100 @@
+"""Lead-source attribution categorization tests.
+
+``main._categorize_lead_source`` maps a Lead-like object onto the coarse
+buckets the CRM attribution chart consumes. Its precedence is:
+
+ utm_source > referrer > raw source bucket
+
+The real ``Lead`` dataclass has no ``utm_source`` / ``referrer`` fields yet —
+the function reads them defensively with ``getattr(..., None)`` so it stays
+crash-free if they ever land. We exercise those branches with a lightweight
+``SimpleNamespace`` fake (no Firestore), and cover the raw-source fallbacks
+with the actual ``Lead`` dataclass.
+
+``main`` instantiates a Firestore client at import (``lead_manager =
+LeadManager(...)``), which needs GCP credentials the CI "no Firestore/GCS"
+job lacks. So ``_categorize_lead_source`` is pulled in via the ``categorize``
+fixture, which first calls ``create_client`` to stub those eager imports the
+same way the rest of the suite does — importing ``main`` raw at module top
+errors during collection in a creds-less environment.
+"""
+
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from lead_management import Lead
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+
+@pytest.fixture
+def categorize(monkeypatch):
+ """Return ``main._categorize_lead_source`` with Firestore stubbed out."""
+ from test_api_v1 import create_client
+
+ create_client(monkeypatch)
+ from main import _categorize_lead_source
+
+ return _categorize_lead_source
+
+
+def _fake_lead(source=None, utm_source=None, referrer=None):
+ """A minimal Lead-like object — no Firestore, no dataclass overhead."""
+ return SimpleNamespace(source=source, utm_source=utm_source, referrer=referrer)
+
+
+def test_utm_source_takes_priority_and_is_lowercased(categorize):
+ # utm_source="Instagram" -> "utm:instagram"; utm wins even when a raw
+ # source and referrer are also present.
+ lead = _fake_lead(source="chat", utm_source="Instagram", referrer="https://t.co/x")
+ assert categorize(lead) == "utm:instagram"
+
+
+def test_referrer_falls_back_to_host(categorize):
+ # No utm -> referrer host (protocol + path stripped).
+ lead = _fake_lead(referrer="https://www.google.com/search?q=mobile+homes")
+ assert categorize(lead) == "referrer:www.google.com"
+
+
+def test_referrer_host_is_truncated_to_40_chars(categorize):
+ host = "a" * 60
+ lead = _fake_lead(referrer=f"http://{host}.com/path")
+ result = categorize(lead)
+ assert result.startswith("referrer:")
+ assert result == "referrer:" + ("a" * 40)
+
+
+def test_referrer_with_no_host_becomes_direct(categorize):
+ lead = _fake_lead(referrer="https://")
+ assert categorize(lead) == "referrer:direct"
+
+
+def test_known_raw_source_bucket(categorize):
+ # No utm / referrer -> the raw source bucket. "chat" passes through.
+ lead = _fake_lead(source="chat")
+ assert categorize(lead) == "chat"
+
+
+def test_chat_intake_normalized_to_chat(categorize):
+ lead = _fake_lead(source="chat_intake")
+ assert categorize(lead) == "chat"
+
+
+def test_empty_source_is_other(categorize):
+ lead = _fake_lead(source="")
+ assert categorize(lead) == "other"
+
+
+def test_unknown_raw_source_passes_through_lowercased(categorize):
+ lead = _fake_lead(source="Facebook_Ad")
+ assert categorize(lead) == "facebook_ad"
+
+
+def test_real_lead_dataclass_uses_source_bucket(categorize):
+ # A genuine Lead has no utm_source/referrer attrs; getattr defaults keep
+ # the function on the raw-source branch without raising AttributeError.
+ lead = Lead(lead_id="L1", user_id="U1", session_id="S1", source="instagram")
+ assert categorize(lead) == "instagram"
diff --git a/tests/test_social_publishers.py b/tests/test_social_publishers.py
new file mode 100644
index 0000000..67cc012
--- /dev/null
+++ b/tests/test_social_publishers.py
@@ -0,0 +1,207 @@
+"""Tests for the fail-closed social publishing adapters.
+
+These assert the safety contract of ``tools/social_publishers.py``:
+nothing is published unless the explicit ``THO_SOCIAL_PUBLISH_ENABLED`` gate is
+on and the required tokens are configured, readiness reporting is accurate, and
+the opt-in UTM CTA link builder stays a strict no-op by default.
+
+All environment access is monkeypatched and the module-level ``requests`` is
+replaced with a guard that fails loudly if any test would make a real HTTP call.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from tools import social_publishers
+
+# Every env var the module reads. We clear all of them per test so the suite is
+# hermetic regardless of the developer's shell or CI secrets.
+_SOCIAL_ENV_VARS = (
+ "THO_SOCIAL_PUBLISH_ENABLED",
+ "THO_UTM_CTA_ENABLED",
+ "THO_UTM_SOURCE",
+ "THO_UTM_MEDIUM",
+ "PUBLIC_SITE_URL",
+ "TIKTOK_ACCESS_TOKEN",
+ "TIKTOK_PRIVACY_LEVEL",
+ "META_ACCESS_TOKEN",
+ "META_GRAPH_VERSION",
+ "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+)
+
+
+class _NoHTTP:
+ """Stand-in for ``requests`` that fails if any HTTP method is invoked."""
+
+ def __getattr__(self, name: str):
+ def _boom(*args, **kwargs): # pragma: no cover - only fires on misuse
+ raise AssertionError(
+ f"Unexpected real HTTP call: requests.{name}({args!r}, {kwargs!r})"
+ )
+
+ return _boom
+
+
+@pytest.fixture(autouse=True)
+def _isolate_env_and_block_http(monkeypatch):
+ """Clear all social env vars and forbid real HTTP for every test."""
+ for var in _SOCIAL_ENV_VARS:
+ monkeypatch.delenv(var, raising=False)
+ # The module imports ``requests`` at module scope, so patch it there.
+ monkeypatch.setattr(social_publishers, "requests", _NoHTTP())
+ # _canonical_origin() can fall back to config_loader.get_business(); keep the
+ # CTA tests deterministic by relying only on PUBLIC_SITE_URL (set per test).
+ return monkeypatch
+
+
+# ---------------------------------------------------------------------------
+# (1) prepare_or_publish_social_post returns a draft (no publish) when the
+# THO_SOCIAL_PUBLISH_ENABLED gate is unset.
+# ---------------------------------------------------------------------------
+
+
+def test_prepare_returns_draft_when_publish_gate_unset(monkeypatch):
+ # Fully configure tiktok tokens + site URL so the ONLY thing missing is the
+ # publish gate. This isolates the gate as the reason it stays a draft.
+ monkeypatch.setenv("TIKTOK_ACCESS_TOKEN", "tok-abc")
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+ # THO_SOCIAL_PUBLISH_ENABLED intentionally left unset.
+
+ result = social_publishers.prepare_or_publish_social_post(
+ platform="tiktok",
+ content_type="video",
+ scheduled_time="2026-07-01T12:00:00",
+ caption="New listing tour",
+ hashtags=["#texashomes"],
+ video_url="https://cdn.example.com/clip.mp4",
+ )
+
+ assert result["success"] is True
+ assert result["status"] == "draft_ready"
+ assert result["live_integration"] is False
+ assert result["publish_attempted"] is False
+ assert result["post_id"].startswith("DRAFT-")
+ # The blocking reason must point at the disabled publish gate.
+ assert "THO_SOCIAL_PUBLISH_ENABLED" in result["publish_blocked_reason"]
+ # readiness embedded in the draft confirms publish is not enabled.
+ assert result["social_readiness"]["publish_enabled"] is False
+
+
+def test_prepare_draft_does_not_call_requests(monkeypatch):
+ # Even with tokens present, an unset gate must not reach the network. The
+ # autouse _NoHTTP guard would raise AssertionError if it did.
+ monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+ monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123")
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+ result = social_publishers.prepare_or_publish_social_post(
+ platform="instagram_reels",
+ content_type="video",
+ scheduled_time="2026-07-01T12:00:00",
+ caption="Reel",
+ video_url="https://cdn.example.com/reel.mp4",
+ )
+
+ assert result["status"] == "draft_ready"
+ assert result["publish_attempted"] is False
+
+
+# ---------------------------------------------------------------------------
+# (2) social_readiness() reports instagram_reels configured=false with the
+# correct required_env when tokens are absent, true when present.
+# ---------------------------------------------------------------------------
+
+
+def test_social_readiness_instagram_unconfigured_lists_required_env(monkeypatch):
+ # No META/IG/site tokens set (cleared by autouse fixture).
+ readiness = social_publishers.social_readiness()
+ ig = readiness["platforms"]["instagram_reels"]
+
+ assert ig["configured"] is False
+ # All three inputs are missing, so all three must be reported.
+ assert set(ig["required_env"]) == {
+ "META_ACCESS_TOKEN",
+ "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+ "PUBLIC_SITE_URL",
+ }
+ assert ig["api"] == "Meta Instagram Content Publishing API"
+ assert readiness["publish_enabled"] is False
+
+
+def test_social_readiness_instagram_partial_lists_only_missing(monkeypatch):
+ # Token present, account id + site URL still missing.
+ monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+
+ ig = social_publishers.social_readiness()["platforms"]["instagram_reels"]
+
+ assert ig["configured"] is False
+ assert set(ig["required_env"]) == {
+ "INSTAGRAM_BUSINESS_ACCOUNT_ID",
+ "PUBLIC_SITE_URL",
+ }
+
+
+def test_social_readiness_instagram_configured_when_all_present(monkeypatch):
+ monkeypatch.setenv("META_ACCESS_TOKEN", "meta-tok")
+ monkeypatch.setenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "ig-123")
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+ ig = social_publishers.social_readiness()["platforms"]["instagram_reels"]
+
+ assert ig["configured"] is True
+ assert ig["required_env"] == []
+
+
+# ---------------------------------------------------------------------------
+# (3) The UTM CTA link builder returns None when THO_UTM_CTA_ENABLED is unset,
+# and a correctly-tagged URL when enabled + a canonical origin is present.
+# ---------------------------------------------------------------------------
+
+
+def test_utm_cta_link_none_when_gate_unset(monkeypatch):
+ # Origin present but the opt-in gate is off -> strict no-op.
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+
+ assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None
+
+
+def test_utm_cta_link_none_when_enabled_but_no_origin(monkeypatch):
+ # Gate on but no resolvable origin -> still no-op. Block the config_loader
+ # fallback so the absence of PUBLIC_SITE_URL truly means "no origin".
+ monkeypatch.setenv("THO_UTM_CTA_ENABLED", "true")
+ monkeypatch.setattr(
+ social_publishers, "_canonical_origin", lambda: None
+ )
+
+ assert social_publishers._utm_cta_link("tiktok", "Spring Sale") is None
+
+
+def test_utm_cta_link_tagged_url_when_enabled_with_origin(monkeypatch):
+ monkeypatch.setenv("THO_UTM_CTA_ENABLED", "1")
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com/")
+
+ link = social_publishers._utm_cta_link("tiktok", "Spring Sale 2026")
+
+ assert link is not None
+ # Trailing slash on origin is stripped before composing the URL.
+ assert link.startswith("https://example.com/?")
+ assert "utm_source=tiktok" in link
+ assert "utm_medium=social" in link
+ # Campaign is slugified: lowercased, non-alnum runs -> single hyphen.
+ assert "utm_campaign=spring-sale-2026" in link
+
+
+def test_utm_cta_link_respects_source_and_medium_overrides(monkeypatch):
+ monkeypatch.setenv("THO_UTM_CTA_ENABLED", "yes")
+ monkeypatch.setenv("PUBLIC_SITE_URL", "https://example.com")
+ monkeypatch.setenv("THO_UTM_SOURCE", "ig")
+ monkeypatch.setenv("THO_UTM_MEDIUM", "paid_social")
+
+ link = social_publishers._utm_cta_link("instagram_reels", None)
+
+ assert link is not None
+ assert "utm_source=ig" in link
+ assert "utm_medium=paid_social" in link
+ # Empty campaign falls back to the "ad-studio" default token.
+ assert "utm_campaign=ad-studio" in link