Skip to content

Commit 4147163

Browse files
authored
Group by rule (#490)
* group output by rule * Group rich audit output by rule/type and expand coverage - Render rich warnings as rule groups with per-issue-type subgroups - Add auditor tests for subgroup ordering, mixed issue types, and verbose snippet behavior - Add CLI rich end-to-end grouping checks plus calculus golden snapshot - Improve docstrings and compact test argument setup * Improve --only CLI coverage and fix rich Path filename crash * Test that audit-ignore suppresses untranslated and diff findings in compare_files
1 parent 6126970 commit 4147163

7 files changed

Lines changed: 612 additions & 95 deletions

File tree

PythonScripts/audit_translations/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ The tool automatically adjusts its matching logic based on the file type:
5656

5757
**Syntax:**
5858
```bash
59-
# Preferred: console script (no -m needed)
6059
uv run audit-translations <language> [--file <specific_file>]
6160
uv run audit-translations --list
6261
@@ -111,4 +110,4 @@ uv run --project PythonScripts audit-translations --list
111110

112111
### Testing
113112

114-
```uv run python -m pytest```
113+
```uv run pytest```

PythonScripts/audit_translations/auditor.py

Lines changed: 145 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import os
1010
import sys
1111
from pathlib import Path
12-
from typing import Iterable, List, Optional, TextIO, Tuple
12+
from typing import Any, Dict, Iterable, List, Optional, TextIO, Tuple
1313

1414
from rich.console import Console
1515
from rich.markup import escape
@@ -154,24 +154,46 @@ def rule_label(rule: RuleInfo) -> str:
154154
if rule.name is None:
155155
return f"[yellow]\"{escape(rule.key)}\"[/]"
156156
tag = rule.tag or "unknown"
157-
return f"[cyan]{escape(rule.name)}[/] [dim][{escape(tag)}][/]"
157+
return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]"
158158

159159

160-
def print_rule_item(rule: RuleInfo, issue_line: int, context: str = ""):
161-
console.print(f" [dim]•[/] {rule_label(rule)} [dim](line {issue_line}{context})[/]")
160+
def issue_type_sort_key(issue_type: str) -> Tuple[int, str]:
161+
"""
162+
Stable ordering for per-rule issue groups.
162163
164+
The first tuple element defines user-facing priority (missing/untranslated/
165+
match/condition/variables/structure/extra). The second element keeps sorting
166+
deterministic for unknown keys.
167+
"""
168+
order = {
169+
"missing_rule": 0,
170+
"untranslated_text": 1,
171+
"rule_difference:match": 2,
172+
"rule_difference:condition": 3,
173+
"rule_difference:variables": 4,
174+
"rule_difference:structure": 5,
175+
"extra_rule": 6,
176+
}
177+
return order.get(issue_type, 99), issue_type
163178

164-
def print_diff_item(diff: RuleDifference, line_en: int, line_tr: int, verbose: bool = False):
165-
"""Print a single rule difference"""
166-
rule = diff.english_rule
167-
console.print(
168-
f" [dim]•[/] {rule_label(rule)} "
169-
f"[dim](line {line_en} en, {line_tr} tr)[/]"
170-
)
171-
console.print(f" [dim]{diff.description}[/]")
172-
if verbose:
173-
console.print(f" [green]en:[/] {escape(diff.english_snippet)}")
174-
console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}")
179+
180+
def issue_type_label(issue_type: str) -> str:
181+
"""
182+
Return the display label used in rich grouped output.
183+
184+
Unknown issue types fall back to their raw key so renderer behavior remains
185+
robust when new categories are introduced.
186+
"""
187+
labels = {
188+
"missing_rule": "Missing in Translation",
189+
"untranslated_text": "Untranslated Text",
190+
"rule_difference:match": "Match Pattern Differences",
191+
"rule_difference:condition": "Condition Differences",
192+
"rule_difference:variables": "Variable Differences",
193+
"rule_difference:structure": "Structure Differences",
194+
"extra_rule": "Extra in Translation",
195+
}
196+
return labels.get(issue_type, issue_type)
175197

176198

177199
def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict:
@@ -373,6 +395,7 @@ def write(self, issue: dict) -> None:
373395
def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = False) -> int:
374396
"""Print warnings to console. Returns count of issues found."""
375397
issues = 0
398+
display_name = Path(file_name).as_posix()
376399

377400
has_issues = result.missing_rules or result.untranslated_text or result.extra_rules or result.rule_differences
378401
if not has_issues:
@@ -382,64 +405,117 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
382405
("red", "✗") if result.translated_rule_count == 0 else ("yellow", "⚠")
383406
console.print()
384407
console.rule(style="cyan")
385-
console.print(f"[{style}]{icon}[/] [bold]{escape(file_name)}[/]")
408+
console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]")
386409
console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]")
387410
console.rule(style="cyan")
388411

389-
if result.missing_rules:
390-
console.print(f"\n [red]✗[/] [bold]Missing Rules[/] [[red]{len(result.missing_rules)}[/]] [dim](in English but not in translation)[/]")
391-
for rule in result.missing_rules:
392-
print_rule_item(rule, issue_line=rule.line_number, context=" in English")
393-
issues += 1
394-
395-
if result.untranslated_text:
396-
untranslated_count = sum(len(entries) for _, entries in result.untranslated_text)
397-
console.print(f"\n [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{untranslated_count}[/]] [dim](lowercase t/ot/ct keys)[/]")
398-
for rule, entries in result.untranslated_text:
399-
for _, text, line in entries:
400-
issue_line = line or rule.line_number
401-
print_rule_item(rule, issue_line=issue_line)
402-
console.print(f" [dim]→[/] [yellow]\"{escape(text)}\"[/]")
403-
issues += 1
404-
405-
if result.rule_differences:
406-
# Count only diffs that will actually be displayed
407-
displayable_diffs = []
408-
for diff in result.rule_differences:
409-
if diff.diff_type == "structure":
410-
en_tokens = extract_structure_elements(diff.english_rule.data)
411-
tr_tokens = extract_structure_elements(diff.translated_rule.data)
412-
en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)
413-
414-
# Skip reporting when tokens are misaligned (both exist but differ)
415-
# This avoids misleading line numbers when entire blocks are missing/added
416-
if en_token is not None and tr_token is not None and en_token != tr_token:
417-
continue
418-
419-
line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
420-
line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
421-
# Skip structure diffs where we can't find both tokens
422-
if line_en is None or line_tr is None:
423-
continue
424-
else:
425-
line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
426-
line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)
427-
displayable_diffs.append((diff, line_en, line_tr))
428-
429-
if displayable_diffs:
430-
console.print(
431-
f"\n [magenta]≠[/] [bold]Rule Differences[/] "
432-
f"[[magenta]{len(displayable_diffs)}[/]] [dim](structural differences between en and translation)[/]"
412+
grouped_issues: Dict[str, Dict[str, Any]] = {}
413+
414+
def add_issue(rule: RuleInfo, issue_type: str, payload: Dict[str, Any]) -> None:
415+
if rule.key not in grouped_issues:
416+
grouped_issues[rule.key] = {
417+
"rule": rule,
418+
"by_type": {},
419+
}
420+
type_map: Dict[str, List[Dict[str, Any]]] = grouped_issues[rule.key]["by_type"]
421+
type_map.setdefault(issue_type, []).append(payload)
422+
423+
for rule in result.missing_rules:
424+
add_issue(
425+
rule,
426+
"missing_rule",
427+
{"line_en": rule.line_number},
428+
)
429+
430+
for rule, entries in result.untranslated_text:
431+
for _, text, line in entries:
432+
issue_line = line or rule.line_number
433+
add_issue(
434+
rule,
435+
"untranslated_text",
436+
{"line_tr": issue_line, "text": text},
433437
)
434-
for diff, line_en, line_tr in displayable_diffs:
435-
print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose)
436-
issues += 1
437-
438-
if result.extra_rules:
439-
console.print(f"\n [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{len(result.extra_rules)}[/]] [dim](may be intentional)[/]")
440-
for rule in result.extra_rules:
441-
print_rule_item(rule, issue_line=rule.line_number)
442-
issues += 1
438+
439+
for diff in result.rule_differences:
440+
if diff.diff_type == "structure":
441+
en_tokens = extract_structure_elements(diff.english_rule.data)
442+
tr_tokens = extract_structure_elements(diff.translated_rule.data)
443+
en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens)
444+
445+
# Skip reporting when tokens are misaligned (both exist but differ)
446+
# This avoids misleading line numbers when entire blocks are missing/added
447+
if en_token is not None and tr_token is not None and en_token != tr_token:
448+
continue
449+
450+
line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
451+
line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
452+
# Skip structure diffs where we can't find both tokens
453+
if line_en is None or line_tr is None:
454+
continue
455+
else:
456+
line_en = resolve_issue_line(diff.english_rule, diff.diff_type)
457+
line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type)
458+
459+
add_issue(
460+
diff.english_rule,
461+
f"rule_difference:{diff.diff_type}",
462+
{"line_en": line_en, "line_tr": line_tr, "diff": diff},
463+
)
464+
465+
for rule in result.extra_rules:
466+
add_issue(
467+
rule,
468+
"extra_rule",
469+
{"line_tr": rule.line_number},
470+
)
471+
472+
if grouped_issues:
473+
total_grouped_issues = sum(
474+
len(entries)
475+
for group in grouped_issues.values()
476+
for entries in group["by_type"].values()
477+
)
478+
console.print(
479+
f"\n [magenta]≠[/] [bold]Rule Issues[/] "
480+
f"[[magenta]{total_grouped_issues}[/]] [dim](grouped by rule and issue type)[/]"
481+
)
482+
for group in grouped_issues.values():
483+
rule = group["rule"]
484+
by_type: Dict[str, List[Dict[str, Any]]] = group["by_type"]
485+
console.print(f" [dim]•[/] {rule_label(rule)}")
486+
for issue_type in sorted(by_type.keys(), key=issue_type_sort_key):
487+
entries = by_type[issue_type]
488+
console.print(
489+
f" [dim]{issue_type_label(issue_type)} "
490+
f"[{len(entries)}][/]"
491+
)
492+
for entry in entries:
493+
if issue_type == "missing_rule":
494+
console.print(
495+
f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]"
496+
)
497+
issues += 1
498+
elif issue_type == "extra_rule":
499+
console.print(
500+
f" [dim]•[/] [dim](line {entry['line_tr']} in translation)[/]"
501+
)
502+
issues += 1
503+
elif issue_type == "untranslated_text":
504+
console.print(
505+
f" [dim]•[/] [dim](line {entry['line_tr']} tr)[/] "
506+
f"[yellow]\"{escape(entry['text'])}\"[/]"
507+
)
508+
issues += 1
509+
else:
510+
diff: RuleDifference = entry["diff"]
511+
console.print(
512+
f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} tr)[/]"
513+
)
514+
console.print(f" [dim]{diff.description}[/]")
515+
if verbose:
516+
console.print(f" [green]en:[/] {escape(diff.english_snippet)}")
517+
console.print(f" [red]tr:[/] {escape(diff.translated_snippet)}")
518+
issues += 1
443519

444520
return issues
445521

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
╭──────────────────────────────────────────────────────────────────────────────╮
2+
│ MathCAT Translation Audit: ES │
3+
╰──────────────────────────────────────────────────────────────────────────────╯
4+
5+
Comparing against English (en) reference files
6+
Files to check: 1
7+
8+
────────────────────────────────────────────────────────────────────────────────
9+
⚠ SharedRules/calculus.yaml
10+
English: 4 rules → Translated: 3 rules
11+
────────────────────────────────────────────────────────────────────────────────
12+
13+
≠ Rule Issues [13] (grouped by rule and issue type)
14+
• laplacian (laplacian)
15+
Missing in Translation [1]
16+
• (line 4 in English)
17+
• divergence (divergence)
18+
Untranslated Text [3]
19+
• (line 10 tr) "divergence"
20+
• (line 11 tr) "div"
21+
• (line 12 tr) "of"
22+
Match Pattern Differences [1]
23+
• (line 22 en, 6 tr)
24+
Match pattern differs
25+
en: count(*) = 1
26+
tr: .
27+
Condition Differences [1]
28+
• (line 25 en, 9 tr)
29+
Conditions differ
30+
en: $Verbosity='Terse', not(IsNode(*[1], 'leaf'))
31+
tr: $Verbosity='Verbose', not(IsNode(*[1], 'leaf'))
32+
• curl (curl)
33+
Untranslated Text [1]
34+
• (line 22 tr) "curl of"
35+
Match Pattern Differences [1]
36+
• (line 35 en, 20 tr)
37+
Match pattern differs
38+
en: count(*) = 1
39+
tr: .
40+
Condition Differences [1]
41+
• (line 39 en, 24 tr)
42+
Conditions differ
43+
en: $Verbosity!='Terse', not(IsNode(*[1], 'leaf'))
44+
tr: not(IsNode(*[1], 'leaf'))
45+
Structure Differences [1]
46+
• (line 38 en, 18 tr)
47+
Rule structure differs (test/if/then/else blocks)
48+
en: replace: test: if: then: test: if: then:
49+
tr: replace: test: if: then:
50+
• gradient (gradient)
51+
Untranslated Text [2]
52+
• (line 34 tr) "gradient of"
53+
• (line 35 tr) "del"
54+
Match Pattern Differences [1]
55+
• (line 48 en, 30 tr)
56+
Match pattern differs
57+
en: count(*) = 1
58+
tr: .
59+
╭──────────────────────────────────────────────────────────────────────────────╮
60+
│ SUMMARY │
61+
│ Files checked 1 │
62+
│ Files with issues 1 │
63+
│ Files OK 0 │
64+
│ Missing rules 1 │
65+
│ Untranslated text 6 │
66+
│ Rule differences 6 │
67+
│ Extra rules 0 │
68+
╰──────────────────────────────────────────────────────────────────────────────╯

PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
English: 1 rules → Translated: 1 rules
55
────────────────────────────────────────────────────────────────────────────────
66

7-
≠ Rule Differences [1] (structural differences between en and translation)
8-
• struct-rule (line 9 en, 1 tr)
9-
Rule structure differs (test/if/then/else blocks)
7+
≠ Rule Issues [1] (grouped by rule and issue type)
8+
• struct-rule (mi)
9+
Structure Differences [1]
10+
• (line 9 en, 1 tr)
11+
Rule structure differs (test/if/then/else blocks)

PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
English: 1 rules → Translated: 1 rules
55
────────────────────────────────────────────────────────────────────────────────
66

7-
≠ Rule Differences [1] (structural differences between en and translation)
8-
• struct-rule (line 9 en, 1 tr)
9-
Rule structure differs (test/if/then/else blocks)
10-
en: replace: test: if: then: else:
11-
tr: replace: test: if: then:
7+
≠ Rule Issues [1] (grouped by rule and issue type)
8+
• struct-rule (mi)
9+
Structure Differences [1]
10+
• (line 9 en, 1 tr)
11+
Rule structure differs (test/if/then/else blocks)
12+
en: replace: test: if: then: else:
13+
tr: replace: test: if: then:

0 commit comments

Comments
 (0)