diff --git a/misaki/cutlet.py b/misaki/cutlet.py index 438d95b..4a509c4 100644 --- a/misaki/cutlet.py +++ b/misaki/cutlet.py @@ -5,7 +5,10 @@ from dataclasses import dataclass from fugashi import Tagger from typing import Tuple -import importlib.resources +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files import jaconv import mojimoji import re @@ -230,8 +233,7 @@ assert all(chr(i) == kk[0] for i, kk in zip(range(12784, 12800), Katakana_Phonetic_Extensions)) Katakana_Phonetic_Extensions = {kk[0]: kk[1] for kk in Katakana_Phonetic_Extensions} -with importlib.resources.open_text(data, 'ja_words.txt') as r: - JA_WORDS = frozenset({line.strip() for line in r}) +JA_WORDS = frozenset({line.strip() for line in files(data).joinpath('ja_words.txt').read_text(encoding='utf-8').splitlines()}) def add_dakuten(kk): """Given a kana (single-character string), add a dakuten.""" diff --git a/misaki/en.py b/misaki/en.py index 222c170..0960ec4 100644 --- a/misaki/en.py +++ b/misaki/en.py @@ -3,7 +3,10 @@ from dataclasses import dataclass, replace from num2words import num2words from typing import List, Optional, Tuple, Union -import importlib.resources +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files import json import numpy as np import re @@ -142,10 +145,8 @@ def __init__(self, british): self.cap_stresses = (0.5, 2) self.golds = {} self.silvers = {} - with importlib.resources.open_text(data, f"{'gb' if british else 'us'}_gold.json") as r: - self.golds = Lexicon.grow_dictionary(json.load(r)) - with importlib.resources.open_text(data, f"{'gb' if british else 'us'}_silver.json") as r: - self.silvers = Lexicon.grow_dictionary(json.load(r)) + self.golds = Lexicon.grow_dictionary(json.loads(files(data).joinpath(f"{'gb' if british else 'us'}_gold.json").read_text(encoding='utf-8'))) + self.silvers = Lexicon.grow_dictionary(json.loads(files(data).joinpath(f"{'gb' if british else 'us'}_silver.json").read_text(encoding='utf-8'))) assert all(isinstance(v, str) or isinstance(v, dict) for v in self.golds.values()) vocab = GB_VOCAB if british else US_VOCAB for vs in self.golds.values(): diff --git a/pyproject.toml b/pyproject.toml index e0ee5aa..b9a8f8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ requires-python = ">=3.8, <3.14" dependencies = [ "addict", + "importlib-resources; python_version < '3.9'", "pip>=25.0.1", # ^ Spacy needs pip within Python, uv lacks pip by default "regex", diff --git a/tests/test_resource_loading.py b/tests/test_resource_loading.py new file mode 100644 index 0000000..292701b --- /dev/null +++ b/tests/test_resource_loading.py @@ -0,0 +1,62 @@ +import sys +import types + + +def _install_stubs(monkeypatch): + addict = types.ModuleType("addict") + + class Dict(dict): + def __getattr__(self, key): + return self.get(key) + + addict.Dict = Dict + monkeypatch.setitem(sys.modules, "addict", addict) + + num2words = types.ModuleType("num2words") + num2words.num2words = lambda *args, **kwargs: "stub" + monkeypatch.setitem(sys.modules, "num2words", num2words) + + spacy = types.ModuleType("spacy") + spacy.load = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "spacy", spacy) + + regex = types.ModuleType("regex") + regex.compile = lambda *args, **kwargs: None + regex.match = lambda *args, **kwargs: None + regex.findall = lambda *args, **kwargs: [] + regex.sub = lambda *args, **kwargs: "" + monkeypatch.setitem(sys.modules, "regex", regex) + + transformers = types.ModuleType("transformers") + transformers.BartForConditionalGeneration = object + monkeypatch.setitem(sys.modules, "transformers", transformers) + + torch = types.ModuleType("torch") + monkeypatch.setitem(sys.modules, "torch", torch) + + fugashi = types.ModuleType("fugashi") + fugashi.Tagger = object + monkeypatch.setitem(sys.modules, "fugashi", fugashi) + + jaconv = types.ModuleType("jaconv") + jaconv.kata2hira = lambda text: text + monkeypatch.setitem(sys.modules, "jaconv", jaconv) + + mojimoji = types.ModuleType("mojimoji") + mojimoji.zen_to_han = lambda text, kana=False: text + mojimoji.han_to_zen = lambda text, digit=False, ascii=False: text + monkeypatch.setitem(sys.modules, "mojimoji", mojimoji) + + +def test_resource_loading_uses_files_api(monkeypatch): + _install_stubs(monkeypatch) + + import importlib.resources + + def boom(*args, **kwargs): + raise AssertionError("open_text should not be used") + + monkeypatch.setattr(importlib.resources, "open_text", boom, raising=False) + + import misaki.cutlet # noqa: F401 + import misaki.en # noqa: F401