initial commit - enclose, joinstem, bysize

and tests
This commit is contained in:
Llywelwyn 2023-09-09 02:07:10 +01:00
parent c65c7ec716
commit 51a4925c14
5 changed files with 94 additions and 2 deletions

34
src/inflect.rs Normal file
View file

@ -0,0 +1,34 @@
use std::collections::{ HashMap, HashSet };
/// Encloses a string 's' in a non-capturing group.
pub fn enclose(s: &str) -> String {
format!("(?:{})", s)
}
/// Joins the stem of each word in 'words' into a string for Regex.
pub fn joinstem(cutpoint: Option<i32>, words: Option<Vec<&str>>) -> String {
let words = words.unwrap_or_else(|| Vec::new());
let stem = words
.iter()
.map(|w| {
if let Some(c) = cutpoint {
if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] }
} else {
w
}
})
.collect::<Vec<&str>>()
.join("|");
enclose(&stem)
}
/// From a list of words, returns a HashMap of HashSets of words, keyed by word length.
pub fn bysize(words: Vec<&str>) -> HashMap<usize, HashSet<String>> {
let mut res: HashMap<usize, HashSet<String>> = HashMap::new();
for word in words {
let len = word.len();
let entry = res.entry(len).or_insert_with(HashSet::new);
entry.insert(word.to_string());
}
res
}

6
src/lib.rs Normal file
View file

@ -0,0 +1,6 @@
mod inflect;
/// inflect_rs is a Rust port of the Python inflect library.
/// It is used to generate plurals, ordinals, indefinite articles, and to convert numbers to words.
pub use crate::inflect::*;