Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ Simply install with `go get`:
s := kana.RomajiToHiragana("kanji") // -> かんじ
s = kana.RomajiToKatakana("banana") // -> バナナ

### Convert between hiragana and katakana:

s := kana.HiraganaToKatakana("ひらがな") // -> ヒラガナ
s = kana.KatakanaToHiragana("カタカナ") // -> かたかな

### Tell whether strings are written with kana, kanji or latin characters:

kana.IsLatin("banana") // -> true
Expand All @@ -33,6 +38,21 @@ Simply install with `go get`:
kana.IsKanji("banana") // -> false
kana.IsKanji("減少") // -> true

### Check if strings contain kana, kanji or latin characters:

kana.ContainsHiragana("hello ひらがな") // -> true
kana.ContainsKatakana("hello カタカナ") // -> true
kana.ContainsKana("hello ひら") // -> true
kana.ContainsKanji("hello 漢字") // -> true

### Count characters by type:

kana.CountHiragana("ひらがな") // -> 4
kana.CountKatakana("カタカナ") // -> 4
kana.CountKana("ひらカタ") // -> 4
kana.CountKanji("漢字") // -> 2
kana.CountRomaji("hello") // -> 5

### Normalize a romaji string to a standardized form (from the form given by Google Translate, for example):

kana.NormalizeRomaji("Myūjikku") // -> myu-jikku
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/gojp/kana

go 1.23
148 changes: 136 additions & 12 deletions kana.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ import (
"unicode/utf8"
)

const (
// Special kana characters
smallTsuHiragana = "っ"
smallTsuKatakana = "ッ"
nHiragana = "ん"
nKatakana = "ン"
longVowelMark = "ー"
)

var (
consonants = []string{"b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "p", "r", "s", "t", "w", "z"}

Expand Down Expand Up @@ -54,6 +63,10 @@ func Initialize() {

// KanaToRomaji converts a kana string to its romaji form
func KanaToRomaji(kana string) (romaji string) {
if kana == "" {
return ""
}

// unfortunate hack to deal with double n's
romaji = hiraganaRe.ReplaceAllString(kana, "nn$1")
romaji = katakanaRe.ReplaceAllString(romaji, "nn$1")
Expand All @@ -63,7 +76,7 @@ func KanaToRomaji(kana string) (romaji string) {
// do some post-processing for the tsu and stripe characters
// maybe a bit of a hacky solution - how can we improve?
// (they act more like punctuation)
tsus := []string{"っ", "ッ"}
tsus := []string{smallTsuHiragana, smallTsuKatakana}
for _, tsu := range tsus {
if strings.Index(romaji, tsu) > -1 {
for _, c := range romaji {
Expand All @@ -83,12 +96,11 @@ func KanaToRomaji(kana string) (romaji string) {
}
}

line := "ー"
for i := strings.Index(romaji, line); i > -1; i = strings.Index(romaji, line) {
for i := strings.Index(romaji, longVowelMark); i > -1; i = strings.Index(romaji, longVowelMark) {
if i > 0 {
romaji = strings.Replace(romaji, line, "-", 1)
romaji = strings.Replace(romaji, longVowelMark, "-", 1)
} else {
romaji = strings.Replace(romaji, line, "", 1)
romaji = strings.Replace(romaji, longVowelMark, "", 1)
}
}
return romaji
Expand All @@ -108,24 +120,35 @@ func replaceNs(romaji string, n string) (result string) {

// RomajiToHiragana converts a romaji string to its hiragana form
func RomajiToHiragana(romaji string) (hiragana string) {
romaji = strings.Replace(romaji, "-", "ー", -1)
romaji = replaceTsus(romaji, "っ")
romaji = replaceNs(romaji, "ん")
if romaji == "" {
return ""
}

romaji = strings.Replace(romaji, "-", longVowelMark, -1)
romaji = replaceTsus(romaji, smallTsuHiragana)
romaji = replaceNs(romaji, nHiragana)
hiragana = romajiToHiraganaTrie.convert(romaji)
return hiragana
}

// RomajiToKatakana converts a romaji string to its katakana form
func RomajiToKatakana(romaji string) (katakana string) {
romaji = strings.Replace(romaji, "-", "ー", -1)
if romaji == "" {
return ""
}

romaji = strings.Replace(romaji, "-", longVowelMark, -1)
// convert double consonants to little tsus first
romaji = replaceTsus(romaji, "ッ")
romaji = replaceNs(romaji, "ン")
romaji = replaceTsus(romaji, smallTsuKatakana)
romaji = replaceNs(romaji, nKatakana)
katakana = romajiToKatakanaTrie.convert(romaji)
return katakana
}

func isChar(s string, rangeTable []*unicode.RangeTable) bool {
if s == "" {
return false
}
runeForm := []rune(s)
for _, r := range runeForm {
if !unicode.IsOneOf(rangeTable, r) {
Expand Down Expand Up @@ -155,7 +178,7 @@ func IsKatakana(s string) bool {
return isChar(s, []*unicode.RangeTable{unicode.Katakana, unicode.Hyphen, unicode.Diacritic})
}

// IsKanji return strue if the string contains only kanji
// IsKanji returns true if the string contains only kanji
func IsKanji(s string) bool {
return isChar(s, []*unicode.RangeTable{unicode.Ideographic})
}
Expand Down Expand Up @@ -183,3 +206,104 @@ func NormalizeRomaji(s string) (romaji string) {

return romaji
}

// HiraganaToKatakana converts hiragana characters to katakana
func HiraganaToKatakana(hiragana string) string {
if hiragana == "" {
return ""
}

result := []rune{}
for _, r := range hiragana {
if r >= '\u3041' && r <= '\u3096' { // Hiragana range
result = append(result, r+'\u30a0'-'\u3040') // Convert to Katakana
} else {
result = append(result, r)
}
}
return string(result)
}

// KatakanaToHiragana converts katakana characters to hiragana
func KatakanaToHiragana(katakana string) string {
if katakana == "" {
return ""
}

result := []rune{}
for _, r := range katakana {
if r >= '\u30a1' && r <= '\u30f6' { // Katakana range
result = append(result, r+'\u3040'-'\u30a0') // Convert to Hiragana
} else {
result = append(result, r)
}
}
return string(result)
}

func containsChar(s string, rangeTable []*unicode.RangeTable) bool {
if s == "" {
return false
}
for _, r := range s {
if unicode.IsOneOf(rangeTable, r) {
return true
}
}
return false
}

// ContainsKana returns true if the string contains at least one kana character
func ContainsKana(s string) bool {
return containsChar(s, []*unicode.RangeTable{unicode.Hiragana, unicode.Katakana})
}

// ContainsHiragana returns true if the string contains at least one hiragana character
func ContainsHiragana(s string) bool {
return containsChar(s, []*unicode.RangeTable{unicode.Hiragana})
}

// ContainsKatakana returns true if the string contains at least one katakana character
func ContainsKatakana(s string) bool {
return containsChar(s, []*unicode.RangeTable{unicode.Katakana})
}

// ContainsKanji returns true if the string contains at least one kanji character
func ContainsKanji(s string) bool {
return containsChar(s, []*unicode.RangeTable{unicode.Ideographic})
}

func countChar(s string, rangeTable []*unicode.RangeTable) int {
count := 0
for _, r := range s {
if unicode.IsOneOf(rangeTable, r) {
count++
}
}
return count
}

// CountHiragana returns the number of hiragana characters in the string
func CountHiragana(s string) int {
return countChar(s, []*unicode.RangeTable{unicode.Hiragana})
}

// CountKatakana returns the number of katakana characters in the string
func CountKatakana(s string) int {
return countChar(s, []*unicode.RangeTable{unicode.Katakana})
}

// CountKana returns the number of kana (hiragana + katakana) characters in the string
func CountKana(s string) int {
return countChar(s, []*unicode.RangeTable{unicode.Hiragana, unicode.Katakana})
}

// CountKanji returns the number of kanji characters in the string
func CountKanji(s string) int {
return countChar(s, []*unicode.RangeTable{unicode.Ideographic})
}

// CountRomaji returns the number of Latin/romaji characters in the string
func CountRomaji(s string) int {
return countChar(s, []*unicode.RangeTable{unicode.Latin})
}
51 changes: 51 additions & 0 deletions kana_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package kana

import "testing"

func BenchmarkKanaToRomaji(b *testing.B) {
for i := 0; i < b.N; i++ {
KanaToRomaji("ひらがな")
}
}

func BenchmarkRomajiToHiragana(b *testing.B) {
for i := 0; i < b.N; i++ {
RomajiToHiragana("hiragana")
}
}

func BenchmarkRomajiToKatakana(b *testing.B) {
for i := 0; i < b.N; i++ {
RomajiToKatakana("katakana")
}
}

func BenchmarkHiraganaToKatakana(b *testing.B) {
for i := 0; i < b.N; i++ {
HiraganaToKatakana("ひらがな")
}
}

func BenchmarkKatakanaToHiragana(b *testing.B) {
for i := 0; i < b.N; i++ {
KatakanaToHiragana("カタカナ")
}
}

func BenchmarkIsKana(b *testing.B) {
for i := 0; i < b.N; i++ {
IsKana("ひらがな")
}
}

func BenchmarkContainsKana(b *testing.B) {
for i := 0; i < b.N; i++ {
ContainsKana("hello ひらがな world")
}
}

func BenchmarkCountKana(b *testing.B) {
for i := 0; i < b.N; i++ {
CountKana("hello ひらがな world カタカナ")
}
}
Loading