holmesgpt/conftest.py at master · grid-x/holmesgpt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import json
import logging
import os
import re
import urllib.parse
import urllib.request
from unittest.mock import MagicMock, patch

import pytest
import responses as responses_

from holmes.core.tracing import get_active_branch_name, readable_timestamp
from tests.llm.conftest import show_llm_summary_report
from tests.llm.utils.braintrust import get_braintrust_url


def pytest_addoption(parser):
    """Add custom pytest command line options"""
    parser.addoption(
        "--skip-setup",
        action="store_true",
        default=False,
        help="Skip running before_test commands for test cases (useful for iterative test development)",
    )
    parser.addoption(
        "--skip-cleanup",
        action="store_true",
        default=False,
        help="Skip running after_test commands for test cases (useful for debugging test failures)",
    )
    parser.addoption(
        "--only-setup",
        action="store_true",
        default=False,
        help="Only run before_test setup commands, skip the actual test execution",
    )
    parser.addoption(
        "--only-cleanup",
        action="store_true",
        default=False,
        help="Only run after_test cleanup commands, skip the actual test execution",
    )
    parser.addoption(
        "--retry-on-throttle",
        action="store_true",
        default=True,
        help="Retry tests when encountering API throttling/overload errors (default: True)",
    )
    parser.addoption(
        "--no-retry-on-throttle",
        action="store_false",
        dest="retry_on_throttle",
        help="Disable retrying tests on API throttling/overload errors",
    )
    parser.addoption(
        "--include-intermediate",
        action="store_true",
        default=True,
        help="Include intermediate LLM outputs when evaluating correctness (default: True)",
    )
    parser.addoption(
        "--no-include-intermediate",
        action="store_false",
        dest="include_intermediate",
        help="Exclude intermediate LLM outputs from evaluation (only use final answer)",
    )
    parser.addoption(
        "--strict-setup-mode",
        type=str,
        default="false",
        choices=["true", "false"],
        help="Fail entire pytest run if any test setup fails (true/false, default: false) - useful for benchmarks to ensure all tests run properly",
    )
    parser.addoption(
        "--strict-setup-exceptions",
        type=str,
        default="",
        help="Comma-separated list of test IDs that are allowed to have setup failures even with --strict-setup-mode",
    )
    parser.addoption(
        "--additional-system-prompt-url",
        action="store",
        default=None,
        help="URL returning the additional system prompt text or JSON (expects 'additional_system_prompt' field)",
    )


def pytest_configure(config):
    """Configure pytest settings"""
    # Disable SSL verification if SSL_VERIFY env var is set to false/0
    # This is useful for running tests in environments with TLS interception proxies
    ssl_verify_env = os.environ.get("SSL_VERIFY", "true").lower()
    if ssl_verify_env in ("false", "0", "no"):
        try:
            import litellm

            litellm.ssl_verify = False
        except ImportError:
            pass

        # Also patch OpenAI client to use verify=False for httpx
        try:
            import httpx
            import openai

            _original_openai_init = openai.OpenAI.__init__

            def _patched_openai_init(self, *args, **kwargs):
                if "http_client" not in kwargs:
                    kwargs["http_client"] = httpx.Client(verify=False)
                return _original_openai_init(self, *args, **kwargs)

            openai.OpenAI.__init__ = _patched_openai_init
        except ImportError:
            pass

    # Auto-derive CONFLUENCE_SA_BASE_URL from CONFLUENCE_BASE_URL if not already set.
    # SA tokens require the Atlassian Cloud API gateway (api.atlassian.com/ex/confluence/{cloudId}/...)
    # rather than the direct instance URL. We discover the cloud ID via the public tenant_info endpoint.
    confluence_base = os.environ.get("CONFLUENCE_BASE_URL")
    if confluence_base and not os.environ.get("CONFLUENCE_SA_BASE_URL"):
        parsed = urllib.parse.urlparse(confluence_base)
        if parsed.scheme not in ("http", "https"):
            logging.warning(f"CONFLUENCE_BASE_URL has unsupported scheme '{parsed.scheme}', skipping SA URL derivation")
        else:
            try:
                tenant_url = f"{confluence_base.rstrip('/')}/_edge/tenant_info"
                with urllib.request.urlopen(tenant_url, timeout=10) as resp:
                    cloud_id = json.loads(resp.read())["cloudId"]
                os.environ["CONFLUENCE_SA_BASE_URL"] = f"https://api.atlassian.com/ex/confluence/{cloud_id}"
                logging.info(f"Auto-derived CONFLUENCE_SA_BASE_URL from cloud ID {cloud_id}")
            except Exception as e:
                logging.warning(f"Could not auto-derive CONFLUENCE_SA_BASE_URL: {e}")

    # Configure worker-specific log files for xdist compatibility
    # worker_id = getattr(config, "workerinput", {}).get("workerid", "master")
    # if worker_id != "master":
    #    # Set worker-specific log file to avoid conflicts
    #    config.option.log_file = f"tests-{worker_id}.log"

    # Determine worker id
    # Also see: https://pytest-xdist.readthedocs.io/en/latest/how-to.html#creating-one-log-file-for-each-worker
    # worker_id = os.environ.get("PYTEST_XDIST_WORKER", default="gw0")

    # # Create logs folder
    # logs_folder = os.environ.get("LOGS_FOLDER", default="logs_folder")
    # os.makedirs(logs_folder, exist_ok=True)

    # # Create file handler to output logs into corresponding worker file
    # file_handler = logging.FileHandler(f"{logs_folder}/logs_worker_{worker_id}.log", mode="w")
    # file_handler.setFormatter(
    #     logging.Formatter(
    #         fmt="{asctime} {levelname}:{name}:{lineno}:{message}",
    #         style="{",
    #     )
    # )
    # # Create stream handler to output logs on console
    # # This is a workaround for a known limitation:
    # # https://pytest-xdist.readthedocs.io/en/latest/known-limitations.html
    # console_handler = logging.StreamHandler(sys.stderr)  # pytest only prints error logs
    # console_handler.setFormatter(
    #     logging.Formatter(
    #         # Include worker id in log messages, \r is needed to separate lines in console
    #         fmt="\r{asctime} " + worker_id + ":{levelname}:{name}:{lineno}:{message}",
    #         style="{",
    #     )
    # )
    # # Configure logging
    # logging.basicConfig(level=logging.INFO, force=True, handlers=[console_handler, file_handler])

    # Suppress noisy LiteLLM logs during testing
    logging.getLogger("LiteLLM").setLevel(logging.ERROR)
    # Also suppress the verbose logger used by LiteLLM
    logging.getLogger("LiteLLM.verbose_logger").setLevel(logging.ERROR)
    # Suppress litellm sub-loggers
    logging.getLogger("litellm").setLevel(logging.ERROR)
    logging.getLogger("litellm.cost_calculator").setLevel(logging.ERROR)
    logging.getLogger("litellm.litellm_core_utils").setLevel(logging.ERROR)
    logging.getLogger("litellm.litellm_core_utils.litellm_logging").setLevel(
        logging.ERROR
    )
    # Suppress httpx HTTP request logs
    logging.getLogger("httpx").setLevel(logging.WARNING)
    logging.getLogger("httpcore").setLevel(logging.WARNING)

    if not os.getenv("EXPERIMENT_ID"):
        try:
            username = os.getlogin()
        except OSError:
            # os.getlogin() fails in environments without a terminal (e.g., GitHub Actions)
            username = os.getenv("USER", "ci")
        git_branch = get_active_branch_name()

        # Include pytest parameters in experiment ID
        params = []
        if config.option.keyword:
            params.append(f"k={config.option.keyword}")
        if config.option.markexpr:
            params.append(f"m={config.option.markexpr}")

        params_str = f"-{'-'.join(params)}" if params else ""
        os.environ["EXPERIMENT_ID"] = (
            f"{username}-{git_branch}{params_str}-{readable_timestamp()}"
        )


def pytest_report_header(config):
    braintrust_api_key = os.environ.get("BRAINTRUST_API_KEY")
    if not braintrust_api_key:
        return ""

    experiment_url = get_braintrust_url()
    clickable_url = f"\033]8;;{experiment_url}\033\\{experiment_url}\033]8;;\033\\"
    return f"Eval results: (link valid once setup completes): {clickable_url}"


# due to pytest quirks, we need to define this in the main conftest.py - when defined in the llm conftest.py it
# is SOMETIMES picked up and sometimes not, depending on how the test was invokedr
pytest_terminal_summary = show_llm_summary_report


@pytest.fixture(autouse=True)
def patch_supabase(monkeypatch):
    monkeypatch.setattr("holmes.core.supabase_dal.ROBUSTA_ACCOUNT_ID", "test-cluster")
    monkeypatch.setattr(
        "holmes.core.supabase_dal.STORE_URL", "https://fakesupabaseref.supabase.co"
    )
    monkeypatch.setattr(
        "holmes.core.supabase_dal.STORE_API_KEY",
        "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiYW5vbiIsImlhdCI6MTYzNTAwODQ4NywiZXhwIjoxOTUwNTg0NDg3fQ.l8IgkO7TQokGSc9OJoobXIVXsOXkilXl4Ak6SCX5qI8",
    )
    monkeypatch.setattr("holmes.core.supabase_dal.STORE_EMAIL", "mock_store_user")
    monkeypatch.setattr(
        "holmes.core.supabase_dal.STORE_PASSWORD", "mock_store_password"
    )


@pytest.fixture(autouse=True, scope="session")
def storage_dal_mock():
    with patch("holmes.config.SupabaseDal") as MockSupabaseDal:
        mock_supabase_dal_instance = MagicMock()
        MockSupabaseDal.return_value = mock_supabase_dal_instance
        mock_supabase_dal_instance.sign_in.return_value = "mock_supabase_user_id"
        mock_supabase_dal_instance.get_ai_credentials.return_value = (
            "mock_account_id",
            "mock_session_token",
        )
        yield mock_supabase_dal_instance


@pytest.fixture(autouse=True)
def responses():
    with responses_.RequestsMock() as rsps:
        rsps.add_passthru("https://www.braintrust.dev")
        rsps.add_passthru("https://api.braintrust.dev")  # Allow Braintrust API calls
        rsps.add_passthru(
            "https://api.newrelic.com/graphql"
        )  # Allow New Relic API calls
        rsps.add_passthru(
            "https://api.eu.newrelic.com/graphql"
        )  # Allow New Relic API calls
        rsps.add_passthru("http://localhost")

        # Allow all Datadog API calls to pass through (all regions and endpoints)
        rsps.add_passthru("https://api.datadoghq.com")
        rsps.add_passthru("https://api.datadoghq.eu")
        rsps.add_passthru("https://api.ddog-gov.com")
        rsps.add_passthru("https://api.us3.datadoghq.com")
        rsps.add_passthru("https://api.us5.datadoghq.com")
        rsps.add_passthru("https://api.ap1.datadoghq.com")
        rsps.add_passthru("https://app.datadoghq.com")
        rsps.add_passthru("https://app.datadoghq.eu")
        # Allow all Coralogix API calls (query and ingestion endpoints, all regions)
        rsps.add_passthru(re.compile(r"https://.*\.coralogix\.com"))
        rsps.add_passthru(re.compile(r"https://.*\.coralogix\.us"))
        rsps.add_passthru(re.compile(r"https://.*\.coralogix\.in"))

        # Allow Elasticsearch/OpenSearch Cloud API calls (various hosting regions)
        rsps.add_passthru(re.compile(r"https://.*\.cloud\.es\.io"))  # Elastic Cloud
        rsps.add_passthru(re.compile(r"https://.*\.elastic-cloud\.com"))  # Azure-hosted
        rsps.add_passthru(
            re.compile(r"https://.*\.es\.amazonaws\.com")
        )  # AWS OpenSearch

        # Allow Confluence/Atlassian Cloud API calls
        rsps.add_passthru(re.compile(r"https://.*\.atlassian\.net"))
        rsps.add_passthru("https://api.atlassian.com")  # Atlassian Cloud API gateway

        # Allow
        rsps.add_passthru("https://google.com")
        rsps.add_passthru("https://burgergooglenetworkspam.co.uk")
        rsps.add_passthru("https://www.google.com")
        rsps.add_passthru("https://www.burgergooglenetworkspam.co.uk")
        yield rsps