initial commit - enclose, joinstem, bysize
and tests
This commit is contained in:
parent
c65c7ec716
commit
51a4925c14
5 changed files with 94 additions and 2 deletions
34
src/inflect.rs
Normal file
34
src/inflect.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use std::collections::{ HashMap, HashSet };
|
||||
|
||||
/// Encloses a string 's' in a non-capturing group.
|
||||
pub fn enclose(s: &str) -> String {
|
||||
format!("(?:{})", s)
|
||||
}
|
||||
|
||||
/// Joins the stem of each word in 'words' into a string for Regex.
|
||||
pub fn joinstem(cutpoint: Option<i32>, words: Option<Vec<&str>>) -> String {
|
||||
let words = words.unwrap_or_else(|| Vec::new());
|
||||
let stem = words
|
||||
.iter()
|
||||
.map(|w| {
|
||||
if let Some(c) = cutpoint {
|
||||
if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] }
|
||||
} else {
|
||||
w
|
||||
}
|
||||
})
|
||||
.collect::<Vec<&str>>()
|
||||
.join("|");
|
||||
enclose(&stem)
|
||||
}
|
||||
|
||||
/// From a list of words, returns a HashMap of HashSets of words, keyed by word length.
|
||||
pub fn bysize(words: Vec<&str>) -> HashMap<usize, HashSet<String>> {
|
||||
let mut res: HashMap<usize, HashSet<String>> = HashMap::new();
|
||||
for word in words {
|
||||
let len = word.len();
|
||||
let entry = res.entry(len).or_insert_with(HashSet::new);
|
||||
entry.insert(word.to_string());
|
||||
}
|
||||
res
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue