diff --git a/Sources/Parser/Preprocessor/GMarkPreprocessor.swift b/Sources/Parser/Preprocessor/GMarkPreprocessor.swift index e9a1592..0070f68 100644 --- a/Sources/Parser/Preprocessor/GMarkPreprocessor.swift +++ b/Sources/Parser/Preprocessor/GMarkPreprocessor.swift @@ -59,8 +59,25 @@ public class LaTeXPreprocessor: GMarkPreprocessorProtocol { private func processLaTeX(_ markdown: String) -> String { var result = markdown - // LaTeX pattern: $$...$$, $...$, \[...\], \(...\) - let pattern = "\\$\\$([\\s\\S]*?)\\$\\$|\\$([\\s\\S]*?)\\$|\\\\\\[([\\s\\S]*?)\\\\\\]|\\\\\\(([\\s\\S]*?)\\\\\\)" + // Process in two passes: + // First pass: Match display math ($$...$$) and bracket notation + // Second pass: Match inline math ($...$) but not where it conflicts with $$ + + // Pass 1: Display math with $$, \[...\], \(...\) + let displayPattern = "\\$\\$([\\s\\S]*?)\\$\\$|\\\\\\[([\\s\\S]*?)\\\\\\]|\\\\\\(([\\s\\S]*?)\\\\\\)" + result = processPattern(displayPattern, in: result, alwaysAccept: true) + + // Pass 2: Inline math with single $, but use markers to avoid conflicts + // We use negative lookbehind and lookahead to ensure $ is not preceded or followed by $ + // The pattern [^$\n]+? ensures content doesn't contain $ or newlines + let inlinePattern = "(? String { + var result = markdown guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else { return result @@ -77,6 +94,14 @@ public class LaTeXPreprocessor: GMarkPreprocessorProtocol { // Skip if content is too large (potential security issue) guard matchedString.count < 3000 else { continue } + // Skip if already wrapped (from previous pass) + if matchedString.contains("") { + continue + } + + // For display math, always accept. For inline math, validate + guard alwaysAccept || isValidLaTeX(matchedString) else { continue } + let wrappedString = wrapLaTeX(matchedString) result = (result as NSString).replacingCharacters(in: matchRange, with: wrappedString) } @@ -84,6 +109,67 @@ public class LaTeXPreprocessor: GMarkPreprocessorProtocol { return result } + private func isValidLaTeX(_ content: String) -> Bool { + // For inline math ($...$), apply validation + // The content here should already have delimiters + let innerContent: Substring + if content.hasPrefix("$") && content.hasSuffix("$") { + innerContent = content.dropFirst().dropLast() + } else { + // For other formats, just use the content as-is + return true + } + + // If empty or just whitespace, not valid LaTeX + guard !innerContent.trimmingCharacters(in: .whitespaces).isEmpty else { + return false + } + + // If content contains table cell delimiters (|), it's very unlikely to be valid inline LaTeX + if innerContent.contains("|") { + return false + } + + // Currency pattern: Just digits possibly with decimal point, currency symbols, or simple separators + // Examples to reject: $20/月, $10, $5.99, $1,000 + // Note: \u4e00-\u9fff represents CJK (Chinese, Japanese, Korean) Unified Ideographs + let currencyPattern = "^\\s*\\d+([.,]\\d+)?\\s*[/\\-a-zA-Z\\u4e00-\\u9fff]*\\s*$" + if let currencyRegex = try? NSRegularExpression(pattern: currencyPattern, options: []), + currencyRegex.firstMatch(in: String(innerContent), options: [], range: NSRange(location: 0, length: innerContent.utf16.count)) != nil { + return false + } + + // Valid LaTeX should contain at least one of these indicators: + // - Backslash (LaTeX commands like \alpha, \frac, etc.) + // - Superscript/subscript (^, _) + // - Common math operators in context (+, -, *, =, <, > when with letters) + // - Greek letters or special symbols + // - Parentheses/brackets with operators + let hasBackslash = innerContent.contains("\\") + let hasSuperSubScript = innerContent.contains("^") || innerContent.contains("_") + let hasLetters = innerContent.rangeOfCharacter(from: CharacterSet.letters) != nil + + // If it has backslash (LaTeX command) or super/subscript, it's likely LaTeX + if hasBackslash || hasSuperSubScript { + return true + } + + // If it has letters and is not just a simple number/currency, it might be LaTeX + // This catches expressions like "x + y", "a = b", etc. + if hasLetters { + // Check if it contains math-like patterns + // Note: - is placed at the end of character class to avoid escaping issues + let mathPattern = "[a-zA-Z]\\s*[+*/=<>-]|[+*/=<>-]\\s*[a-zA-Z]|[a-zA-Z]\\s*\\^|\\^\\s*[a-zA-Z]" + if let mathRegex = try? NSRegularExpression(pattern: mathPattern, options: []), + mathRegex.firstMatch(in: String(innerContent), options: [], range: NSRange(location: 0, length: innerContent.utf16.count)) != nil { + return true + } + } + + // Default to false for safety - don't treat as LaTeX unless we're confident + return false + } + private func wrapLaTeX(_ content: String) -> String { let lines = content.components(separatedBy: .newlines) diff --git a/Tests/GMarkdownTests/GMarkdownTests.swift b/Tests/GMarkdownTests/GMarkdownTests.swift index 10db1fa..8f0f1bd 100644 --- a/Tests/GMarkdownTests/GMarkdownTests.swift +++ b/Tests/GMarkdownTests/GMarkdownTests.swift @@ -9,4 +9,57 @@ final class GMarkdownTests: XCTestCase { // Defining Test Cases and Test Methods // https://developer.apple.com/documentation/xctest/defining_test_cases_and_test_methods } + + // MARK: - LaTeX Preprocessor Tests + + func testLaTeXPreprocessorDoesNotMatchCurrencySymbols() throws { + let preprocessor = LaTeXPreprocessor() + + // Test case 1: $20/月 should not be treated as LaTeX + let input1 = "$20/月" + let result1 = preprocessor.process(input1) + XCTAssertEqual(result1, input1, "Currency symbols like $20/月 should not be treated as LaTeX") + + // Test case 2: 最低$10/月 should not be treated as LaTeX + let input2 = "最低$10/月" + let result2 = preprocessor.process(input2) + XCTAssertEqual(result2, input2, "Currency symbols in text like 最低$10/月 should not be treated as LaTeX") + + // Test case 3: Multiple currency symbols in a sentence + let input3 = "价格从$10到$20不等" + let result3 = preprocessor.process(input3) + XCTAssertEqual(result3, input3, "Multiple currency symbols should not be treated as LaTeX") + } + + func testLaTeXPreprocessorMatchesActualLaTeX() throws { + let preprocessor = LaTeXPreprocessor() + + // Test case 1: Inline LaTeX with actual math expression + let input1 = "The formula is $x^2 + y^2 = z^2$ in the text" + let result1 = preprocessor.process(input1) + XCTAssertTrue(result1.contains(""), "Actual LaTeX expressions should be wrapped") + XCTAssertTrue(result1.contains("x^2 + y^2 = z^2"), "LaTeX content should be preserved") + + // Test case 2: Display LaTeX with $$ + let input2 = "Display formula: $$\\int_0^1 x^2 dx$$" + let result2 = preprocessor.process(input2) + XCTAssertTrue(result2.contains(""), "Display LaTeX with $$ should be wrapped") + + // Test case 3: LaTeX with brackets + let input3 = "Formula: \\[x + y = z\\]" + let result3 = preprocessor.process(input3) + XCTAssertTrue(result3.contains(""), "LaTeX with \\[...\\] should be wrapped") + } + + func testLaTeXPreprocessorMixedContent() throws { + let preprocessor = LaTeXPreprocessor() + + // Test mixed content from the issue + let input = "| **收费模式** | 免费基础版 / Plus订阅 ($20/月) | **完全免费** (Web端) / API超低价 | **完全免费** | 必须付费订阅 (最低$10/月) |" + let result = preprocessor.process(input) + + // Currency symbols should remain unchanged + XCTAssertTrue(result.contains("$20/月"), "Currency $20/月 should not be modified") + XCTAssertTrue(result.contains("$10/月"), "Currency $10/月 should not be modified") + } }