diff --git a/CHANGELOG.md b/CHANGELOG.md index eced7a56..2ba3d34b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -# 7.8.3 - 2026-02-09 +# 7.8.5 - 2026-02-09 + +fix: further optimize code variables pattern matching + +# 7.8.4 - 2026-02-09 fix: do not pattern match long values in code variables diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index dfc92ccf..4cec07e4 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -66,6 +66,7 @@ CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000 +_REGEX_METACHARACTERS = frozenset(r"\.^$*+?{}[]|()") DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 @@ -931,18 +932,47 @@ def strip_string(value, max_length=None): ) +def _extract_plain_substring(pattern): + # Matches inline flag groups like (?i), (?ai), (?ims), etc. that include the 'i' flag. + # Python regex flags: a=ASCII, i=IGNORECASE, L=LOCALE, m=MULTILINE, s=DOTALL, u=UNICODE, x=VERBOSE + inline_flags = re.match(r"^\(\?[aiLmsux]*i[aiLmsux]*\)", pattern) + if not inline_flags: + return None + remainder = pattern[inline_flags.end() :] + if not remainder or any(c in _REGEX_METACHARACTERS for c in remainder): + return None + return remainder.lower() + + def _compile_patterns(patterns): - compiled = [] + if not patterns: + return None + substrings = [] + regexes = [] for pattern in patterns: - try: - compiled.append(re.compile(pattern)) - except Exception: - pass - return compiled + simple = _extract_plain_substring(pattern) + if simple is not None: + substrings.append(simple) + else: + try: + regexes.append(re.compile(pattern)) + except Exception: + pass + if not substrings and not regexes: + return None + return (substrings, regexes) def _pattern_matches(name, patterns): - for pattern in patterns: + if patterns is None: + return False + substrings, regexes = patterns + if substrings: + name_lower = name.lower() + for s in substrings: + if s in name_lower: + return True + for pattern in regexes: if pattern.search(name): return True return False diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 24d84935..a90a000e 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -590,3 +590,52 @@ def test_mask_sensitive_data_circular_ref(): result = _mask_sensitive_data(circular_list, compiled_mask) assert result[0] == "item" assert result[1] == "" + + +def test_compile_patterns_fast_path_and_regex_fallback(): + from posthog.exception_utils import _compile_patterns, _pattern_matches + + # Simple case-insensitive patterns should become substrings + simple_only = _compile_patterns([r"(?i)password", r"(?i)token", r"(?i)jwt"]) + substrings, regexes = simple_only + assert substrings == ["password", "token", "jwt"] + assert regexes == [] + + assert _pattern_matches("my_password_var", simple_only) is True + assert _pattern_matches("MY_TOKEN", simple_only) is True + assert _pattern_matches("safe_variable", simple_only) is False + + # Complex regex patterns should stay as compiled regexes + complex_only = _compile_patterns([r"^__.*", r"\d{3,}", r"^sk_live_"]) + substrings, regexes = complex_only + assert substrings == [] + assert len(regexes) == 3 + + assert _pattern_matches("__dunder", complex_only) is True + assert _pattern_matches("has_999_numbers", complex_only) is True + assert _pattern_matches("sk_live_abc123", complex_only) is True + assert _pattern_matches("normal_var", complex_only) is False + + # Mixed: simple substrings + complex regexes together + mixed = _compile_patterns( + [ + r"(?i)secret", # simple + r"(?i)api_key", # simple + r"^__.*", # regex + r"\btoken_\w+", # regex + ] + ) + substrings, regexes = mixed + assert substrings == ["secret", "api_key"] + assert len(regexes) == 2 + + # Substring matches + assert _pattern_matches("my_secret", mixed) is True + assert _pattern_matches("API_KEY_VALUE", mixed) is True + + # Regex matches + assert _pattern_matches("__private", mixed) is True + assert _pattern_matches("token_abc", mixed) is True + + # No match + assert _pattern_matches("safe_var", mixed) is False diff --git a/posthog/version.py b/posthog/version.py index 9b7cad35..9581625a 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.8.4" +VERSION = "7.8.5" if __name__ == "__main__": print(VERSION, end="") # noqa: T201