Skip to content

Commit ad42e92

Browse files
authored
Merge pull request #100 from longieirl/fix/90-g004-logging-fstrings
fix(#90): replace logging f-strings with % formatting (G004, 214 sites)
2 parents c802c52 + 31524ae commit ad42e92

37 files changed

Lines changed: 478 additions & 303 deletions

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ jobs:
194194
195195
- name: Install security tools + packages
196196
run: |
197-
pip install --upgrade pip bandit[toml] pip-audit
197+
pip install --upgrade pip bandit[toml] pip-audit xenon
198198
pip install -e packages/parser-core
199199
pip install -e packages/parser-free
200200

packages/parser-core/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ select = [
167167
ignore = [
168168
"E501", # line too long — handled by black
169169
"PLR2004", # magic value comparison — acceptable in tests and config
170-
"G004", # logging f-string — 214 violations, deferred, see GitHub issue #90
171170
]
172171

173172
[tool.ruff.lint.per-file-ignores]

packages/parser-core/src/bankstatements_core/analysis/column_analyzer.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def analyze_columns(
4444
Returns:
4545
Dictionary mapping column names to (x_min, x_max) tuples
4646
"""
47-
logger.debug(f"Analyzing columns in table {table_bbox}")
47+
logger.debug("Analyzing columns in table %s", table_bbox)
4848

4949
# Extract words within table bbox
5050
words = page.extract_words(
@@ -65,14 +65,14 @@ def analyze_columns(
6565
logger.warning("No words found in table region")
6666
return {}
6767

68-
logger.debug(f"Found {len(table_words)} words in table region")
68+
logger.debug("Found %s words in table region", len(table_words))
6969

7070
# Find header row first
7171
header_words = self._find_header_words(table_words, table_bbox)
7272

7373
if header_words:
7474
# Strategy: Use header words to define columns
75-
logger.debug(f"Using {len(header_words)} header words to define columns")
75+
logger.debug("Using %s header words to define columns", len(header_words))
7676
boundaries, column_names = self._create_columns_from_headers(
7777
header_words, table_bbox
7878
)
@@ -83,16 +83,16 @@ def analyze_columns(
8383
boundaries = self._detect_boundaries_from_clusters(clusters)
8484
column_names = [f"Column{i+1}" for i in range(len(boundaries))]
8585

86-
logger.debug(f"Detected {len(boundaries)} column boundaries")
86+
logger.debug("Detected %s column boundaries", len(boundaries))
8787

8888
# Build result dictionary
8989
columns = {}
9090
for i, (x_min, x_max) in enumerate(boundaries):
9191
column_name = column_names[i] if i < len(column_names) else f"Column{i+1}"
9292
columns[column_name] = (x_min, x_max)
93-
logger.debug(f" {column_name}: ({x_min:.1f}, {x_max:.1f})")
93+
logger.debug(" %s: (%.1f, %.1f)", column_name, x_min, x_max)
9494

95-
logger.info(f"Detected {len(columns)} columns")
95+
logger.info("Detected %s columns", len(columns))
9696
return columns
9797

9898
def _cluster_x_coordinates(self, words: list[dict]) -> list[float]:
@@ -130,7 +130,7 @@ def _cluster_x_coordinates(self, words: list[dict]) -> list[float]:
130130
clusters.append(cluster_center)
131131

132132
logger.debug(
133-
f"Clustered {len(x_coords)} X-coords into {len(clusters)} clusters"
133+
"Clustered %s X-coords into %s clusters", len(x_coords), len(clusters)
134134
)
135135
return sorted(clusters)
136136

@@ -201,8 +201,10 @@ def _find_header_words(
201201
header_words = [word for word in table_words if word["top"] <= header_threshold]
202202

203203
logger.debug(
204-
f"Found {len(header_words)} words in header row "
205-
f"(Y={min_y:.1f}, threshold={header_threshold:.1f})"
204+
"Found %s words in header row (Y=%.1f, threshold=%.1f)",
205+
len(header_words),
206+
min_y,
207+
header_threshold,
206208
)
207209
return header_words
208210

@@ -272,8 +274,11 @@ def _assign_column_names(
272274
column_names[best_col_idx] = name
273275

274276
logger.debug(
275-
f"Column {best_col_idx} [{boundaries[best_col_idx][0]:.1f}, "
276-
f"{boundaries[best_col_idx][1]:.1f}]: '{name}'"
277+
"Column %s [%.1f, %.1f]: '%s'",
278+
best_col_idx,
279+
boundaries[best_col_idx][0],
280+
boundaries[best_col_idx][1],
281+
name,
277282
)
278283

279284
# Fill in any unassigned columns with generic names
@@ -284,8 +289,11 @@ def _assign_column_names(
284289
name = f"Column{i+1}"
285290
result_names.append(name)
286291
logger.debug(
287-
f"Column {i} [{boundaries[i][0]:.1f}, {boundaries[i][1]:.1f}]: "
288-
f"'{name}' (no match)"
292+
"Column %s [%.1f, %.1f]: '%s' (no match)",
293+
i,
294+
boundaries[i][0],
295+
boundaries[i][1],
296+
name,
289297
)
290298
else:
291299
result_names.append(name_val)
@@ -323,11 +331,16 @@ def _resolve_overlapping_boundaries(
323331
# Leave 1px gap to avoid extraction ambiguity
324332
new_x_max = next_x_min - 1
325333
logger.debug(
326-
f"Overlap detected: Column {i} [{x_min:.1f}, {x_max:.1f}] "
327-
f"overlaps Column {i+1} [{next_x_min:.1f}, {next_x_max:.1f}]"
334+
"Overlap detected: Column %s [%.1f, %.1f] overlaps Column %s [%.1f, %.1f]",
335+
i,
336+
x_min,
337+
x_max,
338+
i + 1,
339+
next_x_min,
340+
next_x_max,
328341
)
329342
logger.debug(
330-
f" Adjusting Column {i} x_max: {x_max:.1f} -> {new_x_max:.1f}"
343+
" Adjusting Column %s x_max: %.1f -> %.1f", i, x_max, new_x_max
331344
)
332345
x_max = new_x_max
333346

@@ -364,7 +377,9 @@ def _create_columns_from_headers(
364377
word_groups.append(current_group)
365378

366379
logger.debug(
367-
f"Grouped {len(header_words)} header words into {len(word_groups)} columns"
380+
"Grouped %s header words into %s columns",
381+
len(header_words),
382+
len(word_groups),
368383
)
369384

370385
# Create boundaries and names from word groups
@@ -390,7 +405,7 @@ def _create_columns_from_headers(
390405
name = " ".join(w["text"] for w in group)
391406
column_names.append(name)
392407

393-
logger.debug(f" Column: '{name}' at [{x_min:.1f}, {x_max:.1f}]")
408+
logger.debug(" Column: '%s' at [%.1f, %.1f]", name, x_min, x_max)
394409

395410
# Resolve overlaps by adjusting boundaries
396411
boundaries = self._resolve_overlapping_boundaries(boundaries)
@@ -399,7 +414,7 @@ def _create_columns_from_headers(
399414
for i, (x_min, x_max) in enumerate(boundaries):
400415
if i < len(column_names):
401416
logger.debug(
402-
f" Adjusted '{column_names[i]}': [{x_min:.1f}, {x_max:.1f}]"
417+
" Adjusted '%s': [%.1f, %.1f]", column_names[i], x_min, x_max
403418
)
404419

405420
return boundaries, column_names

packages/parser-core/src/bankstatements_core/analysis/iban_spatial_filter.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C9
6464
page_num = page.page_number
6565
if page_num != 1:
6666
logger.warning(
67-
f"⚠️ IBAN extraction called on page {page_num} - "
68-
"should only process first page!"
67+
"⚠️ IBAN extraction called on page %s - should only process first page!",
68+
page_num,
6969
)
7070

71-
logger.debug(f"Extracting IBAN candidates from page {page_num}")
71+
logger.debug("Extracting IBAN candidates from page %s", page_num)
7272

7373
# Extract all words with coordinates
7474
words = page.extract_words(x_tolerance=3, y_tolerance=3, keep_blank_chars=False)
@@ -77,7 +77,7 @@ def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C9
7777
logger.debug("No words found on page")
7878
return []
7979

80-
logger.debug(f"Extracted {len(words)} words from page")
80+
logger.debug("Extracted %s words from page", len(words))
8181

8282
# Group nearby words into potential IBAN sequences
8383
candidates = []
@@ -129,7 +129,7 @@ def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C9
129129
)
130130

131131
candidates.append(candidate)
132-
logger.debug(f"Found IBAN candidate: {masked} at {word_bbox}")
132+
logger.debug("Found IBAN candidate: %s at %s", masked, word_bbox)
133133

134134
# Fallback: If no candidates found with word-based approach,
135135
# try text-based extraction with approximate coordinates
@@ -143,7 +143,7 @@ def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C9
143143
iban = self.iban_extractor.extract_iban(page_text)
144144
if iban:
145145
masked = self.iban_extractor._mask_iban(iban)
146-
logger.info(f"✓ Found IBAN using text-based fallback: {masked}")
146+
logger.info("✓ Found IBAN using text-based fallback: %s", masked)
147147

148148
# Create approximate bounding box (page header area)
149149
# Most IBANs are in the top 1/3 of the page
@@ -161,10 +161,11 @@ def extract_iban_candidates(self, page: Any) -> list[IBANCandidate]: # noqa: C9
161161
)
162162
candidates.append(candidate)
163163
logger.debug(
164-
f"Using approximate bounding box for fallback IBAN: {approx_bbox}"
164+
"Using approximate bounding box for fallback IBAN: %s",
165+
approx_bbox,
165166
)
166167

167-
logger.info(f"Found {len(candidates)} IBAN candidates on page {page_num}")
168+
logger.info("Found %s IBAN candidates on page %s", len(candidates), page_num)
168169
return candidates
169170

170171
def filter_by_table_overlap(
@@ -198,19 +199,20 @@ def filter_by_table_overlap(
198199
overlaps_table = True
199200
candidate.rejection_reason = f"Overlaps with table {table_bbox}"
200201
logger.debug(
201-
f"REJECTED: {candidate.masked} overlaps table {table_bbox}"
202+
"REJECTED: %s overlaps table %s", candidate.masked, table_bbox
202203
)
203204
break
204205

205206
if overlaps_table:
206207
rejected.append(candidate)
207208
else:
208209
filtered.append(candidate)
209-
logger.debug(f"ACCEPTED: {candidate.masked} does not overlap tables")
210+
logger.debug("ACCEPTED: %s does not overlap tables", candidate.masked)
210211

211212
logger.info(
212-
f"Filtered IBANs: {len(filtered)} accepted, {len(rejected)} rejected "
213-
f"(table overlap)"
213+
"Filtered IBANs: %s accepted, %s rejected (table overlap)",
214+
len(filtered),
215+
len(rejected),
214216
)
215217

216218
return filtered
@@ -240,23 +242,26 @@ def score_candidates(
240242
# Score 1: Header area preference (+50 points)
241243
if candidate.bbox.y0 <= header_boundary:
242244
score += 50.0
243-
logger.debug(f"{candidate.masked}: +50 (header area)")
245+
logger.debug("%s: +50 (header area)", candidate.masked)
244246

245247
# Score 2: Y-position preference (+0 to +30 points, higher = better)
246248
# Normalize Y position (0 = top, 1 = bottom)
247249
y_ratio = candidate.bbox.y0 / page_height
248250
position_score = 30.0 * (1.0 - y_ratio) # Invert so top gets high score
249251
score += position_score
250252
logger.debug(
251-
f"{candidate.masked}: +{position_score:.1f} "
252-
f"(Y-position {candidate.bbox.y0:.1f}/{page_height:.1f})"
253+
"%s: +%.1f (Y-position %.1f/%.1f)",
254+
candidate.masked,
255+
position_score,
256+
candidate.bbox.y0,
257+
page_height,
253258
)
254259

255260
# Score 3: Near "IBAN" label (future enhancement)
256261
# TODO: Look for "IBAN" text nearby
257262

258263
candidate.confidence_score = score
259-
logger.debug(f"{candidate.masked}: Total score = {score:.1f}")
264+
logger.debug("%s: Total score = %.1f", candidate.masked, score)
260265

261266
# Sort by score (highest first)
262267
candidates_sorted = sorted(
@@ -280,8 +285,10 @@ def select_best_iban(self, candidates: list[IBANCandidate]) -> IBANCandidate | N
280285

281286
best = candidates[0]
282287
logger.info(
283-
f"Selected IBAN: {best.masked} (score: {best.confidence_score:.1f}, "
284-
f"location: {best.bbox})"
288+
"Selected IBAN: %s (score: %.1f, location: %s)",
289+
best.masked,
290+
best.confidence_score,
291+
best.bbox,
285292
)
286293

287294
return best

0 commit comments

Comments
 (0)