@@ -97,21 +97,32 @@ def normalize_text(text):
9797 raise
9898
9999def convert_image_to_png (image_path ):
100- """Convert an image to PNG if it's in an unsupported format (e.g., WebP)."""
100+ """Convert an image to PNG if it's in an unsupported format (e.g., WebP) and validate it ."""
101101 try :
102102 if not os .access (image_path , os .R_OK ):
103+ print (f"Error: No read permission for image { image_path } " )
103104 return None
104105 with Image .open (image_path ) as img :
106+ img .verify () # Check if the image is valid
107+ img = Image .open (image_path ) # Reopen after verify
108+ img .load () # Force load to catch any issues
109+ image_size = img .size
110+ print (f"Image { image_path } : format={ img .format } , size={ image_size } " )
111+ # Skip images larger than 5000x5000 to prevent memory issues
112+ if max (image_size ) > 5000 :
113+ print (f"Error: Image { image_path } too large ({ image_size } ), skipping" )
114+ return None
105115 if img .format .lower () == 'webp' :
106116 temp_dir = '/var/www/bd-news/storage/app/public'
107117 if not os .path .exists (temp_dir ):
108118 os .makedirs (temp_dir )
109119 temp_file = os .path .join (temp_dir , f"temp_{ uuid .uuid4 ().hex } .png" )
110120 img .save (temp_file , 'PNG' )
121+ print (f"Converted WebP to PNG: { image_path } -> { temp_file } " )
111122 return temp_file
112123 return image_path
113124 except Exception as e :
114- print (f"Error converting image { image_path } : { str (e )} " )
125+ print (f"Error validating or converting image { image_path } : { str (e )} " )
115126 return None
116127
117128def get_style_safe (doc , style_name , fallback_style = 'Normal' ):
@@ -219,6 +230,7 @@ def process_element(element, doc, current_para=None, indent=0, html_path=None):
219230 elif element .name == 'img' :
220231 src = element .get ('src' )
221232 if not src :
233+ print (f"Warning: Image tag missing src attribute" )
222234 return current_para
223235 original_src = src
224236 if src .startswith (('http://' , 'https://' )):
@@ -236,11 +248,14 @@ def process_element(element, doc, current_para=None, indent=0, html_path=None):
236248 src = urllib .parse .unquote (src )
237249 src = src .replace ('/storage/app/public/storage/' , '/storage/app/public/' )
238250 if not os .path .exists (src ):
251+ print (f"Warning: Image file not found at { src } (original: { original_src } )" )
239252 return current_para
240253 if not os .access (src , os .R_OK ):
254+ print (f"Error: No read permission for image { src } " )
241255 return current_para
242256 embed_path = convert_image_to_png (src )
243257 if not embed_path :
258+ print (f"Error: Failed to prepare image { src } for embedding" )
244259 return current_para
245260 if current_para is None :
246261 current_para = doc .add_paragraph ()
@@ -250,18 +265,24 @@ def process_element(element, doc, current_para=None, indent=0, html_path=None):
250265 run = current_para .add_run ()
251266 style = parse_style (element .get ('style' , '' ))
252267 width = style .get ('width' , element .get ('width' , '300px' ))
253- width_match = re .match (r'(\d+)\s*px' , width )
254- width_inches = Inches (3 )
268+ print (f"Processing image { src } : width attribute={ width } " )
269+ width_match = re .match (r'(\d+)\s*px' , str (width ))
270+ width_inches = Inches (3 ) # Default width
255271 if width_match :
256272 width_inches = Inches (float (width_match .group (1 )) / 96 )
273+ else :
274+ print (f"Warning: Invalid width format '{ width } ' for image { src } , using default 3 inches" )
257275 run .add_picture (embed_path , width = width_inches )
276+ print (f"Successfully embedded image: { src } " )
258277 except Exception as e :
259- run = current_para .add_run (f"[Image failed to embed: { src } { embed_path } { os .path .exists (embed_path )} ]" )
278+ print (f"Error embedding image { src } : { str (e )} " )
279+ run = current_para .add_run (f"[Image failed to embed: { os .path .basename (src )} | Error: { str (e )} | Path exists: { os .path .exists (embed_path )} ]" )
260280 run .font .color .rgb = RGBColor (255 , 0 , 0 ) # Red text for error
261281 finally :
262282 if embed_path != src and os .path .exists (embed_path ):
263283 try :
264284 os .unlink (embed_path )
285+ print (f"Deleted temporary file: { embed_path } " )
265286 except Exception as e :
266287 print (f"Error deleting temporary file { embed_path } : { str (e )} " )
267288 elif element .name == 'table' :
0 commit comments