@@ -44,7 +44,7 @@ def analyze_columns(
4444 Returns:
4545 Dictionary mapping column names to (x_min, x_max) tuples
4646 """
47- logger .debug (f "Analyzing columns in table { table_bbox } " )
47+ logger .debug ("Analyzing columns in table %s" , table_bbox )
4848
4949 # Extract words within table bbox
5050 words = page .extract_words (
@@ -65,14 +65,14 @@ def analyze_columns(
6565 logger .warning ("No words found in table region" )
6666 return {}
6767
68- logger .debug (f "Found { len ( table_words ) } words in table region" )
68+ logger .debug ("Found %s words in table region" , len ( table_words ) )
6969
7070 # Find header row first
7171 header_words = self ._find_header_words (table_words , table_bbox )
7272
7373 if header_words :
7474 # Strategy: Use header words to define columns
75- logger .debug (f "Using { len ( header_words ) } header words to define columns" )
75+ logger .debug ("Using %s header words to define columns" , len ( header_words ) )
7676 boundaries , column_names = self ._create_columns_from_headers (
7777 header_words , table_bbox
7878 )
@@ -83,16 +83,16 @@ def analyze_columns(
8383 boundaries = self ._detect_boundaries_from_clusters (clusters )
8484 column_names = [f"Column{ i + 1 } " for i in range (len (boundaries ))]
8585
86- logger .debug (f "Detected { len ( boundaries ) } column boundaries" )
86+ logger .debug ("Detected %s column boundaries" , len ( boundaries ) )
8787
8888 # Build result dictionary
8989 columns = {}
9090 for i , (x_min , x_max ) in enumerate (boundaries ):
9191 column_name = column_names [i ] if i < len (column_names ) else f"Column{ i + 1 } "
9292 columns [column_name ] = (x_min , x_max )
93- logger .debug (f " { column_name } : ({ x_min : .1f} , { x_max : .1f} )" )
93+ logger .debug (" %s : (% .1f, % .1f)" , column_name , x_min , x_max )
9494
95- logger .info (f "Detected { len (columns )} columns" )
95+ logger .info ("Detected %s columns" , len (columns ))
9696 return columns
9797
9898 def _cluster_x_coordinates (self , words : list [dict ]) -> list [float ]:
@@ -130,7 +130,7 @@ def _cluster_x_coordinates(self, words: list[dict]) -> list[float]:
130130 clusters .append (cluster_center )
131131
132132 logger .debug (
133- f "Clustered { len ( x_coords ) } X-coords into { len (clusters ) } clusters"
133+ "Clustered %s X-coords into %s clusters" , len (x_coords ), len ( clusters )
134134 )
135135 return sorted (clusters )
136136
@@ -201,8 +201,10 @@ def _find_header_words(
201201 header_words = [word for word in table_words if word ["top" ] <= header_threshold ]
202202
203203 logger .debug (
204- f"Found { len (header_words )} words in header row "
205- f"(Y={ min_y :.1f} , threshold={ header_threshold :.1f} )"
204+ "Found %s words in header row (Y=%.1f, threshold=%.1f)" ,
205+ len (header_words ),
206+ min_y ,
207+ header_threshold ,
206208 )
207209 return header_words
208210
@@ -272,8 +274,11 @@ def _assign_column_names(
272274 column_names [best_col_idx ] = name
273275
274276 logger .debug (
275- f"Column { best_col_idx } [{ boundaries [best_col_idx ][0 ]:.1f} , "
276- f"{ boundaries [best_col_idx ][1 ]:.1f} ]: '{ name } '"
277+ "Column %s [%.1f, %.1f]: '%s'" ,
278+ best_col_idx ,
279+ boundaries [best_col_idx ][0 ],
280+ boundaries [best_col_idx ][1 ],
281+ name ,
277282 )
278283
279284 # Fill in any unassigned columns with generic names
@@ -284,8 +289,11 @@ def _assign_column_names(
284289 name = f"Column{ i + 1 } "
285290 result_names .append (name )
286291 logger .debug (
287- f"Column { i } [{ boundaries [i ][0 ]:.1f} , { boundaries [i ][1 ]:.1f} ]: "
288- f"'{ name } ' (no match)"
292+ "Column %s [%.1f, %.1f]: '%s' (no match)" ,
293+ i ,
294+ boundaries [i ][0 ],
295+ boundaries [i ][1 ],
296+ name ,
289297 )
290298 else :
291299 result_names .append (name_val )
@@ -323,11 +331,16 @@ def _resolve_overlapping_boundaries(
323331 # Leave 1px gap to avoid extraction ambiguity
324332 new_x_max = next_x_min - 1
325333 logger .debug (
326- f"Overlap detected: Column { i } [{ x_min :.1f} , { x_max :.1f} ] "
327- f"overlaps Column { i + 1 } [{ next_x_min :.1f} , { next_x_max :.1f} ]"
334+ "Overlap detected: Column %s [%.1f, %.1f] overlaps Column %s [%.1f, %.1f]" ,
335+ i ,
336+ x_min ,
337+ x_max ,
338+ i + 1 ,
339+ next_x_min ,
340+ next_x_max ,
328341 )
329342 logger .debug (
330- f " Adjusting Column { i } x_max: { x_max : .1f} -> { new_x_max : .1f} "
343+ " Adjusting Column %s x_max: % .1f -> % .1f" , i , x_max , new_x_max
331344 )
332345 x_max = new_x_max
333346
@@ -364,7 +377,9 @@ def _create_columns_from_headers(
364377 word_groups .append (current_group )
365378
366379 logger .debug (
367- f"Grouped { len (header_words )} header words into { len (word_groups )} columns"
380+ "Grouped %s header words into %s columns" ,
381+ len (header_words ),
382+ len (word_groups ),
368383 )
369384
370385 # Create boundaries and names from word groups
@@ -390,7 +405,7 @@ def _create_columns_from_headers(
390405 name = " " .join (w ["text" ] for w in group )
391406 column_names .append (name )
392407
393- logger .debug (f " Column: '{ name } ' at [{ x_min : .1f} , { x_max : .1f} ]" )
408+ logger .debug (" Column: '%s ' at [% .1f, % .1f]" , name , x_min , x_max )
394409
395410 # Resolve overlaps by adjusting boundaries
396411 boundaries = self ._resolve_overlapping_boundaries (boundaries )
@@ -399,7 +414,7 @@ def _create_columns_from_headers(
399414 for i , (x_min , x_max ) in enumerate (boundaries ):
400415 if i < len (column_names ):
401416 logger .debug (
402- f " Adjusted '{ column_names [ i ] } ': [{ x_min : .1f} , { x_max : .1f} ]"
417+ " Adjusted '%s ': [% .1f, % .1f]" , column_names [ i ], x_min , x_max
403418 )
404419
405420 return boundaries , column_names
0 commit comments