From 087cc0340326769358310aa0558ffe31dc30f7c6 Mon Sep 17 00:00:00 2001 From: Malo <57839069+MDLC01@users.noreply.github.com> Date: Thu, 19 Feb 2026 00:18:02 +0100 Subject: [PATCH 1/2] Initial numeral system implementation --- Cargo.lock | 214 +++++++++---- Cargo.toml | 8 +- src/lib.rs | 3 + src/numeral_systems.rs | 694 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 858 insertions(+), 61 deletions(-) create mode 100644 src/numeral_systems.rs diff --git a/Cargo.lock b/Cargo.lock index 6802aad..4cbc702 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -22,23 +28,43 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.31" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ + "find-msvc-tools", "shlex", ] [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chinese-number" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "3e964125508474a83c95eb935697abbeb446ff4e9d62c71ce880e3986d1c606b" +dependencies = [ + "chinese-variant", + "enum-ordinalize", + "num-bigint", + "num-traits", +] + +[[package]] +name = "chinese-variant" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58b52a9840ffff5d4d0058ae529fa066a75e794e3125546acfc61c23ad755e49" [[package]] name = "codex" version = "0.2.0" dependencies = [ + "chinese-number", "ureq", ] @@ -52,26 +78,46 @@ dependencies = [ ] [[package]] -name = "flate2" -version = "1.1.2" +name = "enum-ordinalize" +version = "4.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" dependencies = [ - "crc32fast", - "miniz_oxide", + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "fnv" -version = "1.0.7" +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -80,12 +126,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -97,21 +142,21 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "libc" -version = "0.2.174" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "log" -version = "0.4.27" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "miniz_oxide" @@ -120,6 +165,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", ] [[package]] @@ -130,9 +204,27 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] [[package]] name = "ring" @@ -150,9 +242,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.31" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "log", "once_cell", @@ -163,29 +255,20 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.4" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "ring", "rustls-pki-types", @@ -198,12 +281,35 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "subtle" version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "2.0.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + [[package]] name = "untrusted" version = "0.9.0" @@ -212,27 +318,26 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "3.0.12" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f0fde9bc91026e381155f8c67cb354bcd35260b2f4a29bcc84639f762760c39" +checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" dependencies = [ "base64", "flate2", "log", "percent-encoding", "rustls", - "rustls-pemfile", "rustls-pki-types", "ureq-proto", "utf-8", - "webpki-roots 0.26.11", + "webpki-roots", ] [[package]] name = "ureq-proto" -version = "0.4.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59db78ad1923f2b1be62b6da81fe80b173605ca0d57f85da2e005382adf693f7" +checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" dependencies = [ "base64", "http", @@ -254,18 +359,9 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "webpki-roots" -version = "0.26.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" -dependencies = [ - "webpki-roots 1.0.2", -] - -[[package]] -name = "webpki-roots" -version = "1.0.2" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -345,6 +441,6 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" diff --git a/Cargo.toml b/Cargo.toml index e9dc852..6fbd3f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "codex" version = "0.2.0" authors = ["The Typst Project Developers"] -edition = "2021" +edition = "2024" description = "Human-friendly notation for Unicode symbols." repository = "https://github.com/typst/codex" readme = "README.md" @@ -11,9 +11,13 @@ categories = ["encoding", "text-processing"] keywords = ["unicode", "symbols"] [features] -default = ["styling"] +default = ["numeral-systems", "styling"] +numeral-systems = ["chinese-number"] styling = [] _test-unicode-conformance = ["ureq"] +[dependencies] +chinese-number = { version = "0.7.7", default-features = false, features = ["number-to-chinese"], optional = true } + [build-dependencies] ureq = { version = "3.0.12", optional = true } diff --git a/src/lib.rs b/src/lib.rs index 38d7f51..ac7ee78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ pub use self::shared::ModifierSet; mod shared; +#[cfg(feature = "numeral-systems")] +pub mod numeral_systems; + #[cfg(feature = "styling")] pub mod styling; diff --git a/src/numeral_systems.rs b/src/numeral_systems.rs new file mode 100644 index 0000000..df1e0d1 --- /dev/null +++ b/src/numeral_systems.rs @@ -0,0 +1,694 @@ +//! Various ways of displaying non-negative integers. + +use chinese_number::{ChineseCase, ChineseVariant, from_u64_to_chinese_ten_thousand}; +use std::fmt::{Display, Formatter}; + +/// Represents a numeral system of one of multiple predefined kinds. +#[non_exhaustive] +#[derive(Debug, Clone, Copy)] +pub enum NumeralSystem<'a> { + /// A big-endian + /// [positional notation](https://en.wikipedia.org/wiki/Positional_notation) + /// system. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any non-negative integer. + /// + /// ## Example + /// + /// With the digits `['0', '1', '2']`, we obtain the ternary numeral system: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 0 | 0 | + /// | 1 | 1 | + /// | 2 | 2 | + /// | 3 | 10 | + /// | 4 | 12 | + /// | 5 | 12 | + /// | 6 | 20 | + Positional(&'a [char]), + + /// A big-endian + /// [bijective numeration](https://en.wikipedia.org/wiki/Bijective_numeration) + /// system. This is similar to positional notation, but without a digit for + /// zero. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any positive integer. + /// + /// ## Example + /// + /// With the digits `['A', 'B', 'C']`, we obtain a system similar to one + /// commonly used to number columns in spreadsheet software: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 1 | A | + /// | 2 | B | + /// | 3 | C | + /// | 4 | AA | + /// | 5 | AB | + /// | 6 | AC | + /// | 7 | BA | + Bijective(&'a [char]), + + /// An additive + /// [sign-value notation](https://en.wikipedia.org/wiki/Sign-value_notation) + /// system. + /// + /// The numerals must be specified by decreasing value. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any positive integer. + /// + /// ## Example + /// + /// With the numerals `[("V", 5), ("IV", 4), ("I", 1)]`, we obtain the start + /// of the Roman numeral system: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 1 | I | + /// | 2 | II | + /// | 3 | III | + /// | 4 | IV | + /// | 5 | V | + /// | 6 | VI | + /// | 7 | VII | + Additive(&'a [(&'a str, u64)]), + + /// A system that uses repeating symbols. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any positive integer. + /// + /// ## Example + /// + /// With the symbols `['A', 'B', 'C']`, we obtain the following + /// representations: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 1 | A | + /// | 2 | B | + /// | 3 | C | + /// | 4 | AA | + /// | 5 | BB | + /// | 6 | CC | + /// | 7 | AAA | + Symbolic(&'a [char]), + + /// A system that uses a fixed set of symbols to represent the first + /// non-negative integers. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any non-negative integer. + /// + /// ## Example + /// + /// With the symbols `['A', 'B', 'C']`, we obtain the following + /// representations: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 0 | A | + /// | 1 | B | + /// | 2 | C | + ZeroableFixed(&'a [char]), + + /// A system that uses a fixed set of symbols to represent the first + /// positive integers. + /// + /// ## Representable Numbers + /// + /// A numeral system of this kind can represent any positive integer. + /// + /// ## Example + /// + /// With the symbols `['A', 'B', 'C']`, we obtain the following + /// representations: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 1 | A | + /// | 2 | B | + /// | 3 | C | + NonZeroableFixed(&'a [char]), + + /// A Chinese numeral system. + /// + /// ## Representable Numbers + /// + /// Chinese numeral systems can represent any non-negative integer. + /// + /// ## Example + /// + /// With [`ChineseVariant::Simple`] and [`ChineseCase::Lower`], we + /// obtain the following representations: + /// + /// | Number | Representation | + /// |--------|----------------| + /// | 0 | 零 | + /// | 1 | 一 | + /// | 2 | 二 | + /// | 3 | 三 | + /// | 4 | 四 | + /// | 5 | 五 | + /// | 6 | 六 | + Chinese(ChineseVariant, ChineseCase), +} + +impl<'a> NumeralSystem<'a> { + pub const fn apply( + &'a self, + number: u64, + ) -> Result, RepresentationError> { + match self { + Self::Positional(_) | Self::Chinese(_, _) => {} + Self::Bijective(_) | Self::Symbolic(_) => { + if number == 0 { + return Err(RepresentationError::Zero); + } + } + Self::Additive(numerals) => { + if !matches!(numerals.last(), Some((_, 0))) { + return Err(RepresentationError::Zero); + } + } + Self::ZeroableFixed(symbols) => { + if number as usize >= symbols.len() { + return Err(RepresentationError::TooLarge); + } + } + Self::NonZeroableFixed(symbols) => { + if number == 0 { + return Err(RepresentationError::Zero); + } + if number as usize > symbols.len() { + return Err(RepresentationError::TooLarge); + } + } + } + Ok(RepresentedNumber { system: self, number }) + } + + /// Base-ten + /// [Arabic numerals](https://en.wikipedia.org/wiki/Arabic_numerals): 0, + /// 1, 2, 3, ... + pub const ARABIC: Self = + NumeralSystem::Positional(&['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']); + + /// Circled Arabic numerals up to fifty: ⓪, ①, ②, ... + pub const CIRCLED_ARABIC: Self = NumeralSystem::ZeroableFixed(&[ + '⓪', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '⑪', '⑫', '⑬', '⑭', '⑮', + '⑯', '⑰', '⑱', '⑲', '⑳', '㉑', '㉒', '㉓', '㉔', '㉕', '㉖', '㉗', '㉘', '㉙', + '㉚', '㉛', '㉜', '㉝', '㉞', '㉟', '㊱', '㊲', '㊳', '㊴', '㊵', '㊶', '㊷', + '㊸', '㊹', '㊺', '㊻', '㊼', '㊽', '㊾', '㊿', + ]); + + /// Double-circled Arabic numerals up to ten: ⓵, ⓶, ⓷, ... + pub const DOUBLE_CIRCLED_ARABIC: Self = NumeralSystem::NonZeroableFixed(&[ + '⓵', '⓶', '⓷', '⓸', '⓹', '⓺', '⓻', '⓼', '⓽', '⓾', + ]); + + /// Lowercase + /// [Latin letters](https://en.wikipedia.org/wiki/Latin_alphabet): a, b, + /// c, ..., y, z, aa, ab, ... + pub const LOWER_LATIN: Self = NumeralSystem::Bijective(&[ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', + 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + ]); + + /// Uppercase + /// [Latin letters](https://en.wikipedia.org/wiki/Latin_alphabet): A, B, + /// C, ..., Y, Z, AA, AB, ... + pub const UPPER_LATIN: Self = NumeralSystem::Bijective(&[ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + ]); + + /// Lowercase + /// [Roman numerals](https://en.wikipedia.org/wiki/Roman_numerals): i, + /// ii, iii, ... + pub const LOWER_ROMAN: Self = NumeralSystem::Additive(&[ + ("m̅", 1000000), + ("d̅", 500000), + ("c̅", 100000), + ("l̅", 50000), + ("x̅", 10000), + ("v̅", 5000), + ("i̅v̅", 4000), + ("m", 1000), + ("cm", 900), + ("d", 500), + ("cd", 400), + ("c", 100), + ("xc", 90), + ("l", 50), + ("xl", 40), + ("x", 10), + ("ix", 9), + ("v", 5), + ("iv", 4), + ("i", 1), + ("n", 0), + ]); + + /// Uppercase + /// [Roman numerals](https://en.wikipedia.org/wiki/Roman_numerals): I, + /// II, III, ... + pub const UPPER_ROMAN: Self = NumeralSystem::Additive(&[ + ("M̅", 1000000), + ("D̅", 500000), + ("C̅", 100000), + ("L̅", 50000), + ("X̅", 10000), + ("V̅", 5000), + ("I̅V̅", 4000), + ("M", 1000), + ("CM", 900), + ("D", 500), + ("CD", 400), + ("C", 100), + ("XC", 90), + ("L", 50), + ("XL", 40), + ("X", 10), + ("IX", 9), + ("V", 5), + ("IV", 4), + ("I", 1), + ("N", 0), + ]); + + /// Lowercase + /// [Greek numerals](https://en.wikipedia.org/wiki/Greek_numerals): α, + /// β, γ, ... + pub const LOWER_GREEK: Self = NumeralSystem::Additive(&[ + ("͵θ", 9000), + ("͵η", 8000), + ("͵ζ", 7000), + ("͵ϛ", 6000), + ("͵ε", 5000), + ("͵δ", 4000), + ("͵γ", 3000), + ("͵β", 2000), + ("͵α", 1000), + ("ϡ", 900), + ("ω", 800), + ("ψ", 700), + ("χ", 600), + ("φ", 500), + ("υ", 400), + ("τ", 300), + ("σ", 200), + ("ρ", 100), + ("ϟ", 90), + ("π", 80), + ("ο", 70), + ("ξ", 60), + ("ν", 50), + ("μ", 40), + ("λ", 30), + ("κ", 20), + ("ι", 10), + ("θ", 9), + ("η", 8), + ("ζ", 7), + ("ϛ", 6), + ("ε", 5), + ("δ", 4), + ("γ", 3), + ("β", 2), + ("α", 1), + ("𐆊", 0), + ]); + + /// Uppercase + /// [Greek numerals](https://en.wikipedia.org/wiki/Greek_numerals): Α, + /// Β, Γ, ... + pub const UPPER_GREEK: Self = NumeralSystem::Additive(&[ + ("͵Θ", 9000), + ("͵Η", 8000), + ("͵Ζ", 7000), + ("͵Ϛ", 6000), + ("͵Ε", 5000), + ("͵Δ", 4000), + ("͵Γ", 3000), + ("͵Β", 2000), + ("͵Α", 1000), + ("Ϡ", 900), + ("Ω", 800), + ("Ψ", 700), + ("Χ", 600), + ("Φ", 500), + ("Υ", 400), + ("Τ", 300), + ("Σ", 200), + ("Ρ", 100), + ("Ϟ", 90), + ("Π", 80), + ("Ο", 70), + ("Ξ", 60), + ("Ν", 50), + ("Μ", 40), + ("Λ", 30), + ("Κ", 20), + ("Ι", 10), + ("Θ", 9), + ("Η", 8), + ("Ζ", 7), + ("Ϛ", 6), + ("Ε", 5), + ("Δ", 4), + ("Γ", 3), + ("Β", 2), + ("Α", 1), + ("𐆊", 0), + ]); + + /// Hebrew numerals, including Geresh/Gershayim. + pub const HEBREW: Self = NumeralSystem::Additive(&[ + ("ת", 400), + ("ש", 300), + ("ר", 200), + ("ק", 100), + ("צ", 90), + ("פ", 80), + ("ע", 70), + ("ס", 60), + ("נ", 50), + ("מ", 40), + ("ל", 30), + ("כ", 20), + ("יט", 19), + ("יח", 18), + ("יז", 17), + ("טז", 16), + ("טו", 15), + ("י", 10), + ("ט", 9), + ("ח", 8), + ("ז", 7), + ("ו", 6), + ("ה", 5), + ("ד", 4), + ("ג", 3), + ("ב", 2), + ("א", 1), + ("-", 0), + ]); + + /// Simplified Chinese standard numerals. + pub const LOWER_SIMPLIFIED_CHINESE: Self = + NumeralSystem::Chinese(ChineseVariant::Simple, ChineseCase::Lower); + + /// Simplified Chinese "banknote" numerals. + pub const UPPER_SIMPLIFIED_CHINESE: Self = + NumeralSystem::Chinese(ChineseVariant::Simple, ChineseCase::Upper); + + /// Traditional Chinese standard numerals. + pub const LOWER_TRADITIONAL_CHINESE: Self = + NumeralSystem::Chinese(ChineseVariant::Traditional, ChineseCase::Lower); + + /// Traditional Chinese "banknote" numerals. + pub const UPPER_TRADITIONAL_CHINESE: Self = + NumeralSystem::Chinese(ChineseVariant::Traditional, ChineseCase::Upper); + + /// Hiragana in the gojūon order. Includes n but excludes wi and we. + pub const HIRAGANA_AIUEO: Self = NumeralSystem::Bijective(&[ + 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', + 'せ', 'そ', 'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'ぬ', 'ね', 'の', 'は', + 'ひ', 'ふ', 'へ', 'ほ', 'ま', 'み', 'む', 'め', 'も', 'や', 'ゆ', 'よ', 'ら', + 'り', 'る', 'れ', 'ろ', 'わ', 'を', 'ん', + ]); + + /// Hiragana in the iroha order. Includes wi and we but excludes n. + pub const HIRAGANA_IROHA: Self = NumeralSystem::Bijective(&[ + 'い', 'ろ', 'は', 'に', 'ほ', 'へ', 'と', 'ち', 'り', 'ぬ', 'る', 'を', 'わ', + 'か', 'よ', 'た', 'れ', 'そ', 'つ', 'ね', 'な', 'ら', 'む', 'う', 'ゐ', 'の', + 'お', 'く', 'や', 'ま', 'け', 'ふ', 'こ', 'え', 'て', 'あ', 'さ', 'き', 'ゆ', + 'め', 'み', 'し', 'ゑ', 'ひ', 'も', 'せ', 'す', + ]); + + /// Katakana in the gojūon order. Includes n but excludes wi and we. + pub const KATAKANA_AIUEO: Self = NumeralSystem::Bijective(&[ + 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'シ', 'ス', + 'セ', 'ソ', 'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', + 'ヒ', 'フ', 'ヘ', 'ホ', 'マ', 'ミ', 'ム', 'メ', 'モ', 'ヤ', 'ユ', 'ヨ', 'ラ', + 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', + ]); + + /// Katakana in the iroha order. Includes wi and we but excludes n. + pub const KATAKANA_IROHA: Self = NumeralSystem::Bijective(&[ + 'イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'リ', 'ヌ', 'ル', 'ヲ', 'ワ', + 'カ', 'ヨ', 'タ', 'レ', 'ソ', 'ツ', 'ネ', 'ナ', 'ラ', 'ム', 'ウ', 'ヰ', 'ノ', + 'オ', 'ク', 'ヤ', 'マ', 'ケ', 'フ', 'コ', 'エ', 'テ', 'ア', 'サ', 'キ', 'ユ', + 'メ', 'ミ', 'シ', 'ヱ', 'ヒ', 'モ', 'セ', 'ス', + ]); + + /// Korean jamo: ㄱ, ㄴ, ㄷ, ... + pub const KOREAN_JAMO: Self = NumeralSystem::Bijective(&[ + 'ㄱ', 'ㄴ', 'ㄷ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅅ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', + 'ㅎ', + ]); + + /// Korean syllables: 가, 나, 다, ... + pub const KOREAN_SYLLABLE: Self = NumeralSystem::Bijective(&[ + '가', '나', '다', '라', '마', '바', '사', '아', '자', '차', '카', '타', '파', + '하', + ]); + + /// Eastern Arabic numerals, used in some Arabic-speaking countries. + pub const EASTERN_ARABIC: Self = + NumeralSystem::Positional(&['٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨', '٩']); + + /// The variant of Eastern Arabic numerals used in Persian and Urdu. + pub const EASTERN_ARABIC_PERSIAN: Self = + NumeralSystem::Positional(&['۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹']); + + /// Devanagari numerals. + pub const DEVANAGARI_NUMBER: Self = + NumeralSystem::Positional(&['०', '१', '२', '३', '४', '५', '६', '७', '८', '९']); + + /// Bengali numerals. + pub const BENGALI_NUMBER: Self = + NumeralSystem::Positional(&['০', '১', '২', '৩', '৪', '৫', '৬', '৭', '৮', '৯']); + + /// Bengali letters: ক, খ, গ, ..., কক, কখ, ... + pub const BENGALI_LETTER: Self = NumeralSystem::Bijective(&[ + 'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ', 'ণ', 'ত', + 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল', 'শ', 'ষ', 'স', 'হ', + ]); + + /// [Paragraph/note-like symbols](https://en.wikipedia.org/wiki/Note_(typography)#Numbering_and_symbols): + /// *, †, ‡, §, ¶, and ‖. + /// + /// Further items use repeated symbols. + pub const SYMBOL: Self = NumeralSystem::Symbolic(&['*', '†', '‡', '§', '¶', '‖']); +} + +/// A number, together with a numeral system in which it is representable. +/// +/// Notably, this type implements [`Display`] and is thus compatible with +/// [`format!()`]. +#[derive(Debug, Clone, Copy)] +pub struct RepresentedNumber<'a> { + /// Invariant: This system must be able to represent the number. + system: &'a NumeralSystem<'a>, + number: u64, +} + +impl<'a> Display for RepresentedNumber<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.system { + NumeralSystem::Positional(digits) => { + let mut n = self.number; + + if n == 0 { + return write!(f, "{}", digits[0]); + } + + let radix = digits.len() as u64; + let size = n.ilog(radix) + 1; + // The place value of the most significant digit. For a number + // of size 1, the MSD's place is the ones place, hence `- 1`. + let mut msd_place = radix.pow(size - 1); + for _ in 0..size { + let msd = n / msd_place; + write!(f, "{}", digits[msd as usize])?; + n -= msd * msd_place; + msd_place /= radix; + } + Ok(()) + } + + NumeralSystem::Bijective(digits) => { + let mut n = self.number; + + assert_ne!(n, 0); + + let radix = digits.len() as u64; + // Number of digits when representing `n` in this system. + // From https://en.wikipedia.org/wiki/Bijective_numeration#Properties_of_bijective_base-k_numerals. + let size = ((n + 1) * (radix - 1)).ilog(radix); + // Remove from `n` the number consisting of `size - 1` ones in + // base-`radix`, and the print the result using the symbols as + // a positional numeral system. + n -= (radix.pow(size) - 1) / (radix - 1); + // The place value of the most significant digit. For a number + // of size 1, the MSD's place is the ones place, hence `- 1`. + let mut msd_place = radix.pow(size - 1); + for _ in 0..size { + let msd = n / msd_place; + write!(f, "{}", digits[msd as usize])?; + n -= msd * msd_place; + msd_place /= radix; + } + Ok(()) + } + + NumeralSystem::Additive(numerals) => { + let mut n = self.number; + + if n == 0 { + if let Some(&(numeral, 0)) = numerals.last() { + return write!(f, "{}", numeral); + } + unreachable!() + } + + // Greedily add any symbol that fits. + for (numeral, weight) in *numerals { + if *weight == 0 || *weight > n { + continue; + } + let reps = n / weight; + for _ in 0..reps { + write!(f, "{}", numeral)? + } + + n -= weight * reps; + } + Ok(()) + } + NumeralSystem::Symbolic(symbols) => { + let n = self.number; + assert_ne!(n, 0); + let symbol_count = symbols.len() as u64; + for _ in 0..n.div_ceil(symbol_count) { + write!(f, "{}", symbols[((n - 1) % symbol_count) as usize])? + } + Ok(()) + } + + NumeralSystem::ZeroableFixed(symbols) => { + write!(f, "{}", symbols[self.number as usize]) + } + + NumeralSystem::NonZeroableFixed(symbols) => { + write!(f, "{}", symbols[(self.number - 1) as usize]) + } + + NumeralSystem::Chinese(variant, case) => write!( + f, + "{}", + from_u64_to_chinese_ten_thousand(*variant, *case, self.number), + ), + } + } +} + +/// A reason why a number cannot be represented in a numeral system. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub enum RepresentationError { + /// Zero cannot be represented in the numeral system. + Zero, + /// The number is too large for the numeral system. + TooLarge, +} + +#[cfg(test)] +mod tests { + use super::NumeralSystem; + + #[test] + fn test_arabic_numerals() { + for n in 0..=9999 { + assert_eq!(NumeralSystem::ARABIC.apply(n).unwrap().to_string(), n.to_string(),) + } + } + + #[test] + fn test_latin() { + let mut n = 1; + for c1 in 'a'..='z' { + assert_eq!( + NumeralSystem::LOWER_LATIN.apply(n).unwrap().to_string(), + format!("{c1}"), + ); + assert_eq!( + NumeralSystem::UPPER_LATIN.apply(n).unwrap().to_string(), + format!("{c1}").to_uppercase(), + ); + n += 1 + } + for c2 in 'a'..='z' { + for c1 in 'a'..='z' { + assert_eq!( + NumeralSystem::LOWER_LATIN.apply(n).unwrap().to_string(), + format!("{c2}{c1}"), + ); + assert_eq!( + NumeralSystem::UPPER_LATIN.apply(n).unwrap().to_string(), + format!("{c2}{c1}").to_uppercase(), + ); + n += 1 + } + } + for c3 in 'a'..='z' { + for c2 in 'a'..='z' { + for c1 in 'a'..='z' { + assert_eq!( + NumeralSystem::LOWER_LATIN.apply(n).unwrap().to_string(), + format!("{c3}{c2}{c1}"), + ); + assert_eq!( + NumeralSystem::UPPER_LATIN.apply(n).unwrap().to_string(), + format!("{c3}{c2}{c1}").to_uppercase(), + ); + n += 1 + } + } + } + } + + #[test] + fn test_roman() { + for (n, expect) in [ + "n", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", + "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", + "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", + "xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxviii", + "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", + ] + .iter() + .enumerate() + { + assert_eq!( + &NumeralSystem::LOWER_ROMAN.apply(n as u64).unwrap().to_string(), + expect, + ); + assert_eq!( + NumeralSystem::UPPER_ROMAN.apply(n as u64).unwrap().to_string(), + expect.to_uppercase(), + ); + } + } +} From 45fbbc336767399c4949c7f55b1942f5e84a7aed Mon Sep 17 00:00:00 2001 From: Malo <57839069+MDLC01@users.noreply.github.com> Date: Thu, 19 Feb 2026 11:30:35 +0100 Subject: [PATCH 2/2] Improve documentation --- src/numeral_systems.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/numeral_systems.rs b/src/numeral_systems.rs index df1e0d1..34f4541 100644 --- a/src/numeral_systems.rs +++ b/src/numeral_systems.rs @@ -165,6 +165,11 @@ pub enum NumeralSystem<'a> { } impl<'a> NumeralSystem<'a> { + /// Tries to represent a number in this numeral system. + /// + /// If `Ok(r)` is returned, `r` is a value of a type that implements + /// [`Display`] by printing the number as represented in this numeral + /// system. pub const fn apply( &'a self, number: u64, @@ -498,6 +503,8 @@ impl<'a> NumeralSystem<'a> { /// /// Notably, this type implements [`Display`] and is thus compatible with /// [`format!()`]. +/// +/// Values of this type are constructed by [`NumeralSystem::apply`]. #[derive(Debug, Clone, Copy)] pub struct RepresentedNumber<'a> { /// Invariant: This system must be able to represent the number.