hexgrad · thewh1teagle · Jun 23, 2025
diff --git a/README.md b/README.md
@@ -72,6 +72,10 @@ The first gen Chinese tokenizer uses jieba to cut, pypinyin, and pinyin-to-ipa.
 ### Vietnamese
 - https://github.com/v-nhandt21/Viphoneme
 
+## Hebrew
+
+- https://github.com/thewh1teagle/phonikud
+
 ### TODO
 - [ ] Data: Compress [data](https://github.com/hexgrad/misaki/tree/main/misaki/data) (no need for indented json) and eliminate redundancy between gold and silver dictionaries.
 - [ ] Fallbacks: Train seq2seq fallback models on dictionaries using [this notebook](https://github.com/Kyubyong/nlp_made_easy/blob/master/PyTorch%20seq2seq%20template%20based%20on%20the%20g2p%20task.ipynb).

diff --git a/misaki/he.py b/misaki/he.py
@@ -1,22 +1,14 @@
 """
-Phonemize Hebrew using mishkal package from https://github.com/thewh1teagle/mishkal
+Phonemize Hebrew using mishkal package from https://github.com/thewh1teagle/phonikud
 """
 
-import mishkal
+import phonikud
 
 class HEG2P:
-    def __call__(self, text: str, preserve_punctuation = True, preserve_stress = True):
+    def __call__(self, text: str, preserve_punctuation = True, preserve_stress = True, **kwargs):
         """
         Convert Hebrew text to IPA
-        Text is expected to be with diacritics (niqqud)
-        Enable debug to return Word objects that contais detailed conversion information
+        Text is expected to be with enhanced diacritics (nikud)
         """
 
-        return mishkal.phonemize(text, preserve_punctuation=preserve_punctuation, preserve_stress=preserve_stress)
-
-    def get_phonene_set(self):
-        """
-        Return list with exact phonemes used in mishkal package
-        """
-
-        return mishkal.get_phoneme_set()
+        return phonikud.phonemize(text, preserve_punctuation=preserve_punctuation, preserve_stress=preserve_stress, **kwargs)
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,7 +29,7 @@ ja = ["fugashi", "jaconv", "mojimoji", "unidic", "pyopenjtalk"]
 ko = ["jamo", "nltk"]
 zh = ["jieba", "ordered-set", "pypinyin", "cn2an", "pypinyin-dict"]
 vi = ["num2words", "spacy", "spacy-curated-transformers", "underthesea"]
-he = ["mishkal-hebrew>=0.3.2"]
+he = ["phonikud>=0.3.9"]
 
 [build-system]
 requires = ["hatchling"]