Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions misaki/cutlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from dataclasses import dataclass
from fugashi import Tagger
from typing import Tuple
import importlib.resources
try:
from importlib.resources import files
except ImportError:
from importlib_resources import files
import jaconv
import mojimoji
import re
Expand Down Expand Up @@ -230,8 +233,7 @@
assert all(chr(i) == kk[0] for i, kk in zip(range(12784, 12800), Katakana_Phonetic_Extensions))
Katakana_Phonetic_Extensions = {kk[0]: kk[1] for kk in Katakana_Phonetic_Extensions}

with importlib.resources.open_text(data, 'ja_words.txt') as r:
JA_WORDS = frozenset({line.strip() for line in r})
JA_WORDS = frozenset({line.strip() for line in files(data).joinpath('ja_words.txt').read_text(encoding='utf-8').splitlines()})

def add_dakuten(kk):
"""Given a kana (single-character string), add a dakuten."""
Expand Down
11 changes: 6 additions & 5 deletions misaki/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from dataclasses import dataclass, replace
from num2words import num2words
from typing import List, Optional, Tuple, Union
import importlib.resources
try:
from importlib.resources import files
except ImportError:
from importlib_resources import files
import json
import numpy as np
import re
Expand Down Expand Up @@ -142,10 +145,8 @@ def __init__(self, british):
self.cap_stresses = (0.5, 2)
self.golds = {}
self.silvers = {}
with importlib.resources.open_text(data, f"{'gb' if british else 'us'}_gold.json") as r:
self.golds = Lexicon.grow_dictionary(json.load(r))
with importlib.resources.open_text(data, f"{'gb' if british else 'us'}_silver.json") as r:
self.silvers = Lexicon.grow_dictionary(json.load(r))
self.golds = Lexicon.grow_dictionary(json.loads(files(data).joinpath(f"{'gb' if british else 'us'}_gold.json").read_text(encoding='utf-8')))
self.silvers = Lexicon.grow_dictionary(json.loads(files(data).joinpath(f"{'gb' if british else 'us'}_silver.json").read_text(encoding='utf-8')))
assert all(isinstance(v, str) or isinstance(v, dict) for v in self.golds.values())
vocab = GB_VOCAB if british else US_VOCAB
for vs in self.golds.values():
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ classifiers = [
requires-python = ">=3.8, <3.14"
dependencies = [
"addict",
"importlib-resources; python_version < '3.9'",
"pip>=25.0.1",
# ^ Spacy needs pip within Python, uv lacks pip by default
"regex",
Expand Down
62 changes: 62 additions & 0 deletions tests/test_resource_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import sys
import types


def _install_stubs(monkeypatch):
addict = types.ModuleType("addict")

class Dict(dict):
def __getattr__(self, key):
return self.get(key)

addict.Dict = Dict
monkeypatch.setitem(sys.modules, "addict", addict)

num2words = types.ModuleType("num2words")
num2words.num2words = lambda *args, **kwargs: "stub"
monkeypatch.setitem(sys.modules, "num2words", num2words)

spacy = types.ModuleType("spacy")
spacy.load = lambda *args, **kwargs: None
monkeypatch.setitem(sys.modules, "spacy", spacy)

regex = types.ModuleType("regex")
regex.compile = lambda *args, **kwargs: None
regex.match = lambda *args, **kwargs: None
regex.findall = lambda *args, **kwargs: []
regex.sub = lambda *args, **kwargs: ""
monkeypatch.setitem(sys.modules, "regex", regex)

transformers = types.ModuleType("transformers")
transformers.BartForConditionalGeneration = object
monkeypatch.setitem(sys.modules, "transformers", transformers)

torch = types.ModuleType("torch")
monkeypatch.setitem(sys.modules, "torch", torch)

fugashi = types.ModuleType("fugashi")
fugashi.Tagger = object
monkeypatch.setitem(sys.modules, "fugashi", fugashi)

jaconv = types.ModuleType("jaconv")
jaconv.kata2hira = lambda text: text
monkeypatch.setitem(sys.modules, "jaconv", jaconv)

mojimoji = types.ModuleType("mojimoji")
mojimoji.zen_to_han = lambda text, kana=False: text
mojimoji.han_to_zen = lambda text, digit=False, ascii=False: text
monkeypatch.setitem(sys.modules, "mojimoji", mojimoji)


def test_resource_loading_uses_files_api(monkeypatch):
_install_stubs(monkeypatch)

import importlib.resources

def boom(*args, **kwargs):
raise AssertionError("open_text should not be used")

monkeypatch.setattr(importlib.resources, "open_text", boom, raising=False)

import misaki.cutlet # noqa: F401
import misaki.en # noqa: F401