initial commit - enclose, joinstem, bysize

and tests
This commit is contained in:
Llywelwyn 2023-09-09 02:07:10 +01:00
parent c65c7ec716
commit 51a4925c14
5 changed files with 94 additions and 2 deletions

16
Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
[package]
name = "inflect"
version = "0.1.0"
authors = ["Lewis Wynne <lewis@llyw.co.uk>"]
edition = "2021"
publish = false
description = "inflect is a Rust port of the Python inflect library, used to generate plurals, ordinals, indefinite articles, and to convert numbers to words."
homepage = "https://github.com/llywelwyn/inflect_rs"
repository = "https://github.com/llywelwyn/inflect_rs"
readme = "README.md"
keywords = ["inflect", "plural", "ordinal", "pluralize", "formatting"]
categories = ["text-processing", "value-formatting"]
license = "MIT"
[dependencies]
regex = {version = "1.9.5"}

View file

@ -1,7 +1,6 @@
The MIT License (MIT) The MIT License (MIT)
Copyright (c) 2023 Llywelwyn Copyright (c) 2023 Lewis Wynne
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

34
src/inflect.rs Normal file
View file

@ -0,0 +1,34 @@
use std::collections::{ HashMap, HashSet };
/// Encloses a string 's' in a non-capturing group.
pub fn enclose(s: &str) -> String {
format!("(?:{})", s)
}
/// Joins the stem of each word in 'words' into a string for Regex.
pub fn joinstem(cutpoint: Option<i32>, words: Option<Vec<&str>>) -> String {
let words = words.unwrap_or_else(|| Vec::new());
let stem = words
.iter()
.map(|w| {
if let Some(c) = cutpoint {
if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] }
} else {
w
}
})
.collect::<Vec<&str>>()
.join("|");
enclose(&stem)
}
/// From a list of words, returns a HashMap of HashSets of words, keyed by word length.
pub fn bysize(words: Vec<&str>) -> HashMap<usize, HashSet<String>> {
let mut res: HashMap<usize, HashSet<String>> = HashMap::new();
for word in words {
let len = word.len();
let entry = res.entry(len).or_insert_with(HashSet::new);
entry.insert(word.to_string());
}
res
}

6
src/lib.rs Normal file
View file

@ -0,0 +1,6 @@
mod inflect;
/// inflect_rs is a Rust port of the Python inflect library.
/// It is used to generate plurals, ordinals, indefinite articles, and to convert numbers to words.
pub use crate::inflect::*;

37
tests/inflect_tests.rs Normal file
View file

@ -0,0 +1,37 @@
use inflect::*;
#[test]
fn test_enclose() {
assert_eq!(enclose("foo"), "(?:foo)");
}
#[test]
fn test_joinstem() {
assert_eq!(
joinstem(Some(-2), Some(vec!["ephemeris", "iris", ".*itis"])),
"(?:ephemer|ir|.*it)"
);
assert_eq!(joinstem(None, Some(vec!["ephemeris"])), "(?:ephemeris)");
assert_eq!(joinstem(Some(5), None), "(?:)");
assert_eq!(joinstem(None, None), "(?:)");
}
#[test]
fn test_bysize() {
let words = vec!["ant", "cat", "dog", "pig", "frog", "goat", "horse", "elephant"];
let result = bysize(words);
if let Some(set) = result.get(&3) {
let mut sorted_words: Vec<&String> = set.iter().collect();
sorted_words.sort();
assert_eq!(sorted_words, vec!["ant", "cat", "dog", "pig"]);
}
if let Some(set) = result.get(&4) {
let mut sorted_words: Vec<&String> = set.iter().collect();
sorted_words.sort();
assert_eq!(sorted_words, vec!["frog", "goat"]);
}
if let Some(set) = result.get(&5) {
let sorted_words: Vec<&String> = set.iter().collect();
assert_eq!(sorted_words, vec!["horse"]);
}
}