From f2aa051b0558da2a0e0569cbfba25918adb9a7c6 Mon Sep 17 00:00:00 2001
From: Jonas Kruckenberg <118265418+CrabNejonas@users.noreply.github.com>
Date: Wed, 1 Nov 2023 15:29:43 +0100
Subject: [PATCH 01/28] feat: no-std
---
.idea/fst.iml | 14 ++++
.idea/modules.xml | 8 +++
.idea/vcs.xml | 6 ++
.idea/workspace.xml | 126 +++++++++++++++++++++++++++++++++++
Cargo.toml | 4 +-
src/automaton/levenshtein.rs | 23 +++++--
src/bytes.rs | 7 +-
src/error.rs | 24 ++++++-
src/lib.rs | 13 +++-
src/map.rs | 57 +++++++++++++++-
src/raw/build.rs | 31 +++++++--
src/raw/counting_writer.rs | 3 +
src/raw/error.rs | 30 ++++++++-
src/raw/mod.rs | 52 +++++++++++++--
src/raw/node.rs | 16 ++++-
src/raw/ops.rs | 28 +++++++-
src/raw/registry.rs | 11 ++-
src/raw/registry_minimal.rs | 6 +-
src/set.rs | 47 ++++++++++++-
19 files changed, 468 insertions(+), 38 deletions(-)
create mode 100644 .idea/fst.iml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/vcs.xml
create mode 100644 .idea/workspace.xml
diff --git a/.idea/fst.iml b/.idea/fst.iml
new file mode 100644
index 00000000..e4ade7cf
--- /dev/null
+++ b/.idea/fst.iml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..46ed0723
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..35eb1ddf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 00000000..613a07ed
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,126 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {
+ "associatedIndex": 6
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1698689393828
+
+
+ 1698689393828
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index ca497485..02d51418 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,8 +19,10 @@ members = ["bench", "fst-bin"]
exclude = ["fst-levenshtein", "fst-regex"]
[features]
-default = []
+default = ["std"]
levenshtein = ["utf8-ranges"]
+std = ["alloc"]
+alloc = []
[patch.crates-io]
fst = { path = "." }
diff --git a/src/automaton/levenshtein.rs b/src/automaton/levenshtein.rs
index 4e2c2390..63b0dbf0 100644
--- a/src/automaton/levenshtein.rs
+++ b/src/automaton/levenshtein.rs
@@ -1,7 +1,9 @@
-use std::cmp;
-use std::collections::hash_map::Entry;
-use std::collections::{HashMap, HashSet};
-use std::fmt;
+use core::cmp;
+use core::fmt;
+#[cfg(feature = "alloc")]
+use alloc::collections::hash_map::Entry;
+#[cfg(feature = "alloc")]
+use alloc::collections::{HashMap, HashSet};
use utf8_ranges::{Utf8Range, Utf8Sequences};
@@ -34,7 +36,7 @@ impl fmt::Display for LevenshteinError {
}
}
-impl std::error::Error for LevenshteinError {}
+impl core::error::Error for LevenshteinError {}
/// A Unicode aware Levenshtein automaton for running efficient fuzzy queries.
///
@@ -92,11 +94,13 @@ impl std::error::Error for LevenshteinError {}
///
/// This is important functionality, so one should count on this implementation
/// being vastly improved in the future.
+#[cfg(feature = "alloc")]
pub struct Levenshtein {
prog: DynamicLevenshtein,
dfa: Dfa,
}
+#[cfg(feature = "alloc")]
impl Levenshtein {
/// Create a new Levenshtein query.
///
@@ -109,6 +113,7 @@ impl Levenshtein {
///
/// A `Levenshtein` value satisfies the `Automaton` trait, which means it
/// can be used with the `search` method of any finite state transducer.
+ #[cfg(feature = "alloc")]
pub fn new(
query: &str,
distance: u32,
@@ -132,6 +137,7 @@ impl Levenshtein {
///
/// A `Levenshtein` value satisfies the `Automaton` trait, which means it
/// can be used with the `search` method of any finite state transducer.
+ #[cfg(feature = "alloc")]
pub fn new_with_limit(
query: &str,
distance: u32,
@@ -147,6 +153,7 @@ impl Levenshtein {
}
}
+#[cfg(feature = "alloc")]
impl fmt::Debug for Levenshtein {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
@@ -158,11 +165,13 @@ impl fmt::Debug for Levenshtein {
}
#[derive(Clone)]
+#[cfg(feature = "alloc")]
struct DynamicLevenshtein {
query: String,
dist: usize,
}
+#[cfg(feature = "alloc")]
impl DynamicLevenshtein {
fn start(&self) -> Vec {
(0..self.query.chars().count() + 1).collect()
@@ -190,6 +199,7 @@ impl DynamicLevenshtein {
}
}
+#[cfg(feature = "alloc")]
impl Automaton for Levenshtein {
type State = Option;
@@ -215,6 +225,7 @@ impl Automaton for Levenshtein {
}
#[derive(Debug)]
+#[cfg(feature = "alloc")]
struct Dfa {
states: Vec,
}
@@ -237,12 +248,14 @@ impl fmt::Debug for State {
}
}
+#[cfg(feature = "alloc")]
struct DfaBuilder {
dfa: Dfa,
lev: DynamicLevenshtein,
cache: HashMap, usize>,
}
+#[cfg(feature = "alloc")]
impl DfaBuilder {
fn new(lev: DynamicLevenshtein) -> DfaBuilder {
DfaBuilder {
diff --git a/src/bytes.rs b/src/bytes.rs
index 35da4112..943feaa8 100644
--- a/src/bytes.rs
+++ b/src/bytes.rs
@@ -1,4 +1,5 @@
-use std::convert::TryInto;
+use core::convert::TryInto;
+#[cfg(feature = "std")]
use std::io;
/// Read a u32 in little endian format from the beginning of the given slice.
@@ -30,6 +31,7 @@ pub fn write_u32_le(n: u32, slice: &mut [u8]) {
/// Like write_u32_le, but to an io::Write implementation. If every byte could
/// not be writen, then this returns an error.
#[inline]
+#[cfg(feature = "std")]
pub fn io_write_u32_le(n: u32, mut wtr: W) -> io::Result<()> {
let mut buf = [0; 4];
write_u32_le(n, &mut buf);
@@ -55,6 +57,7 @@ pub fn write_u64_le(n: u64, slice: &mut [u8]) {
/// Like write_u64_le, but to an io::Write implementation. If every byte could
/// not be writen, then this returns an error.
#[inline]
+#[cfg(feature = "std")]
pub fn io_write_u64_le(n: u64, mut wtr: W) -> io::Result<()> {
let mut buf = [0; 8];
write_u64_le(n, &mut buf);
@@ -65,6 +68,7 @@ pub fn io_write_u64_le(n: u64, mut wtr: W) -> io::Result<()> {
/// and writes it to the given writer. The number of bytes written is returned
/// on success.
#[inline]
+#[cfg(feature = "std")]
pub fn pack_uint(wtr: W, n: u64) -> io::Result {
let nbytes = pack_size(n);
pack_uint_in(wtr, n, nbytes).map(|_| nbytes)
@@ -76,6 +80,7 @@ pub fn pack_uint(wtr: W, n: u64) -> io::Result {
/// `nbytes` must be >= pack_size(n) and <= 8, where `pack_size(n)` is the
/// smallest number of bytes that can store the integer given.
#[inline]
+#[cfg(feature = "std")]
pub fn pack_uint_in(
mut wtr: W,
mut n: u64,
diff --git a/src/error.rs b/src/error.rs
index 7f2bd347..603d331e 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,11 +1,17 @@
-use std::fmt;
+use core::fmt;
+#[cfg(feature = "std")]
use std::io;
use crate::raw;
/// A `Result` type alias for this crate's `Error` type.
+#[cfg(feature = "std")]
pub type Result = std::result::Result;
+/// A `Result` type alias for this crate's `Error` type.
+#[cfg(not(feature = "std"))]
+pub type Result = core::result::Result;
+
/// An error that encapsulates all possible errors in this crate.
#[derive(Debug)]
pub enum Error {
@@ -13,9 +19,11 @@ pub enum Error {
/// transducer.
Fst(raw::Error),
/// An IO error that occurred while writing a finite state transducer.
+ #[cfg(feature = "std")]
Io(io::Error),
}
+#[cfg(feature = "std")]
impl From for Error {
#[inline]
fn from(err: io::Error) -> Error {
@@ -34,15 +42,29 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Error::Fst(_) => write!(f, "FST error"),
+ #[cfg(feature = "std")]
Error::Io(_) => write!(f, "I/O error"),
}
}
}
+#[cfg(feature = "std")]
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match *self {
Error::Fst(ref err) => Some(err),
+ #[cfg(feature = "std")]
+ Error::Io(ref err) => Some(err),
+ }
+ }
+}
+
+#[cfg(not(feature = "std"))]
+impl core::error::Error for Error {
+ fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
+ match *self {
+ Error::Fst(ref err) => Some(err),
+ #[cfg(feature = "std")]
Error::Io(ref err) => Some(err),
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 6bf974e4..684f4670 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -299,15 +299,24 @@ data structures found in the standard library, such as `BTreeSet` and
`fst-bin/src/merge.rs` from the root of this crate's repository.
*/
+#![cfg_attr(not(feature = "std"), no_std)]
#![deny(missing_docs)]
+#![cfg_attr(not(feature = "std"), feature(error_in_core))]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
#[cfg(all(feature = "levenshtein", doctest))]
doc_comment::doctest!("../README.md");
pub use crate::automaton::Automaton;
pub use crate::error::{Error, Result};
-pub use crate::map::{Map, MapBuilder};
-pub use crate::set::{Set, SetBuilder};
+pub use crate::map::Map;
+#[cfg(feature = "alloc")]
+pub use crate::map::MapBuilder;
+pub use crate::set::Set;
+#[cfg(feature = "alloc")]
+pub use crate::set::SetBuilder;
pub use crate::stream::{IntoStreamer, Streamer};
mod bytes;
diff --git a/src/map.rs b/src/map.rs
index d2121e0e..be735ba8 100644
--- a/src/map.rs
+++ b/src/map.rs
@@ -1,12 +1,20 @@
-use std::fmt;
+#[cfg(feature = "alloc")]
+use core::fmt;
+#[cfg(feature = "std")]
use std::io;
-use std::iter::{self, FromIterator};
+#[cfg(feature = "alloc")]
+use core::iter::{self, FromIterator};
+#[cfg(feature = "alloc")]
use crate::automaton::{AlwaysMatch, Automaton};
use crate::raw;
pub use crate::raw::IndexedValue;
-use crate::stream::{IntoStreamer, Streamer};
+#[cfg(feature = "alloc")]
+use crate::stream::IntoStreamer;
+use crate::stream::Streamer;
use crate::Result;
+#[cfg(feature = "alloc")]
+use alloc::{vec::Vec, string::String};
/// Map is a lexicographically ordered map from byte strings to integers.
///
@@ -54,6 +62,7 @@ use crate::Result;
#[derive(Clone)]
pub struct Map(raw::Fst);
+#[cfg(feature = "alloc")]
impl Map> {
/// Create a `Map` from an iterator of lexicographically ordered byte
/// strings and associated values.
@@ -64,6 +73,7 @@ impl Map> {
/// Note that this is a convenience function to build a map in memory.
/// To build a map that streams to an arbitrary `io::Write`, use
/// `MapBuilder`.
+ #[cfg(feature = "std")]
pub fn from_iter(iter: I) -> Result