diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5d29654 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "inflect" +version = "0.1.0" +authors = ["Lewis Wynne "] +edition = "2021" +publish = false +description = "inflect is a Rust port of the Python inflect library, used to generate plurals, ordinals, indefinite articles, and to convert numbers to words." +homepage = "https://github.com/llywelwyn/inflect_rs" +repository = "https://github.com/llywelwyn/inflect_rs" +readme = "README.md" +keywords = ["inflect", "plural", "ordinal", "pluralize", "formatting"] +categories = ["text-processing", "value-formatting"] +license = "MIT" + +[dependencies] +regex = {version = "1.9.5"} \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md index e5d79c8..68be029 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,7 +1,6 @@ - The MIT License (MIT) -Copyright (c) 2023 Llywelwyn +Copyright (c) 2023 Lewis Wynne Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/inflect.rs b/src/inflect.rs new file mode 100644 index 0000000..b81d054 --- /dev/null +++ b/src/inflect.rs @@ -0,0 +1,34 @@ +use std::collections::{ HashMap, HashSet }; + +/// Encloses a string 's' in a non-capturing group. +pub fn enclose(s: &str) -> String { + format!("(?:{})", s) +} + +/// Joins the stem of each word in 'words' into a string for Regex. +pub fn joinstem(cutpoint: Option, words: Option>) -> String { + let words = words.unwrap_or_else(|| Vec::new()); + let stem = words + .iter() + .map(|w| { + if let Some(c) = cutpoint { + if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] } + } else { + w + } + }) + .collect::>() + .join("|"); + enclose(&stem) +} + +/// From a list of words, returns a HashMap of HashSets of words, keyed by word length. +pub fn bysize(words: Vec<&str>) -> HashMap> { + let mut res: HashMap> = HashMap::new(); + for word in words { + let len = word.len(); + let entry = res.entry(len).or_insert_with(HashSet::new); + entry.insert(word.to_string()); + } + res +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b74f831 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +mod inflect; + +/// inflect_rs is a Rust port of the Python inflect library. +/// It is used to generate plurals, ordinals, indefinite articles, and to convert numbers to words. + +pub use crate::inflect::*; diff --git a/tests/inflect_tests.rs b/tests/inflect_tests.rs new file mode 100644 index 0000000..715a08c --- /dev/null +++ b/tests/inflect_tests.rs @@ -0,0 +1,37 @@ +use inflect::*; + +#[test] +fn test_enclose() { + assert_eq!(enclose("foo"), "(?:foo)"); +} + +#[test] +fn test_joinstem() { + assert_eq!( + joinstem(Some(-2), Some(vec!["ephemeris", "iris", ".*itis"])), + "(?:ephemer|ir|.*it)" + ); + assert_eq!(joinstem(None, Some(vec!["ephemeris"])), "(?:ephemeris)"); + assert_eq!(joinstem(Some(5), None), "(?:)"); + assert_eq!(joinstem(None, None), "(?:)"); +} + +#[test] +fn test_bysize() { + let words = vec!["ant", "cat", "dog", "pig", "frog", "goat", "horse", "elephant"]; + let result = bysize(words); + if let Some(set) = result.get(&3) { + let mut sorted_words: Vec<&String> = set.iter().collect(); + sorted_words.sort(); + assert_eq!(sorted_words, vec!["ant", "cat", "dog", "pig"]); + } + if let Some(set) = result.get(&4) { + let mut sorted_words: Vec<&String> = set.iter().collect(); + sorted_words.sort(); + assert_eq!(sorted_words, vec!["frog", "goat"]); + } + if let Some(set) = result.get(&5) { + let sorted_words: Vec<&String> = set.iter().collect(); + assert_eq!(sorted_words, vec!["horse"]); + } +}