Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tkhtmlview/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from collections import OrderedDict
import requests
from io import BytesIO
import base64


# __________________________________________________________________________________________________
Expand Down Expand Up @@ -543,6 +544,24 @@ def handle_starttag(self, tag, attrs):
except:

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Bare except statements are used, which can hide unexpected errors.

Catching all exceptions makes it harder to identify and address real issues. Please catch only the relevant exceptions to improve error handling.

pass

if attrs[HTML.Attrs.SRC].startswith(("data:image/jpeg;base64,")):
try:
image = Image.open(
BytesIO(base64.b64decode(attrs[HTML.Attrs.SRC][23:].encode("utf-8")))

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The code uses hardcoded offsets for slicing the base64 string.

Hardcoded offsets are brittle and may fail if the prefix changes or new formats are introduced. Use string splitting to extract the base64 data for better reliability.

)
self.cached_images[attrs[HTML.Attrs.SRC]] = deepcopy(image)
except:
pass

if attrs[HTML.Attrs.SRC].startswith(("data:image/png;base64,", "data:image/gif;base64,")):
Comment on lines +547 to +556

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The code for handling base64 image decoding is duplicated for each image type.

Refactor the base64 decoding logic into a shared helper to simplify maintenance and future extensions.

Suggested implementation:

from collections import OrderedDict
import requests
from io import BytesIO
import base64
from PIL import Image
from copy import deepcopy

def decode_base64_image(src: str) -> "Image.Image|None":
    """
    Decodes a base64-encoded image from a data URI.
    Supports JPEG, PNG, and GIF formats.
    Returns a PIL Image or None if decoding fails.
    """
    prefixes = {
        "data:image/jpeg;base64,": 23,
        "data:image/png;base64,": 22,
        "data:image/gif;base64,": 22,
    }
    for prefix, offset in prefixes.items():
        if src.startswith(prefix):
            try:
                image_data = base64.b64decode(src[offset:].encode("utf-8"))
                return Image.open(BytesIO(image_data))
            except Exception:
                return None
    return None
            image = decode_base64_image(attrs[HTML.Attrs.SRC])
            if image is not None:
                self.cached_images[attrs[HTML.Attrs.SRC]] = deepcopy(image)

try:
image = Image.open(
BytesIO(base64.b64decode(attrs[HTML.Attrs.SRC][22:].encode("utf-8")))
)
self.cached_images[attrs[HTML.Attrs.SRC]] = deepcopy(image)
except:
pass

if attrs[HTML.Attrs.SRC] in self.cached_images.keys():
image = deepcopy(self.cached_images[attrs[HTML.Attrs.SRC]])
elif os.path.exists(attrs[HTML.Attrs.SRC]):
Expand Down