initial commit - enclose, joinstem, bysize
and tests
This commit is contained in:
parent
c65c7ec716
commit
51a4925c14
5 changed files with 94 additions and 2 deletions
16
Cargo.toml
Normal file
16
Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
[package]
|
||||||
|
name = "inflect"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Lewis Wynne <lewis@llyw.co.uk>"]
|
||||||
|
edition = "2021"
|
||||||
|
publish = false
|
||||||
|
description = "inflect is a Rust port of the Python inflect library, used to generate plurals, ordinals, indefinite articles, and to convert numbers to words."
|
||||||
|
homepage = "https://github.com/llywelwyn/inflect_rs"
|
||||||
|
repository = "https://github.com/llywelwyn/inflect_rs"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["inflect", "plural", "ordinal", "pluralize", "formatting"]
|
||||||
|
categories = ["text-processing", "value-formatting"]
|
||||||
|
license = "MIT"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
regex = {version = "1.9.5"}
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
|
|
||||||
The MIT License (MIT)
|
The MIT License (MIT)
|
||||||
|
|
||||||
Copyright (c) 2023 Llywelwyn
|
Copyright (c) 2023 Lewis Wynne
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
||||||
34
src/inflect.rs
Normal file
34
src/inflect.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
use std::collections::{ HashMap, HashSet };
|
||||||
|
|
||||||
|
/// Encloses a string 's' in a non-capturing group.
|
||||||
|
pub fn enclose(s: &str) -> String {
|
||||||
|
format!("(?:{})", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Joins the stem of each word in 'words' into a string for Regex.
|
||||||
|
pub fn joinstem(cutpoint: Option<i32>, words: Option<Vec<&str>>) -> String {
|
||||||
|
let words = words.unwrap_or_else(|| Vec::new());
|
||||||
|
let stem = words
|
||||||
|
.iter()
|
||||||
|
.map(|w| {
|
||||||
|
if let Some(c) = cutpoint {
|
||||||
|
if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] }
|
||||||
|
} else {
|
||||||
|
w
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<&str>>()
|
||||||
|
.join("|");
|
||||||
|
enclose(&stem)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// From a list of words, returns a HashMap of HashSets of words, keyed by word length.
|
||||||
|
pub fn bysize(words: Vec<&str>) -> HashMap<usize, HashSet<String>> {
|
||||||
|
let mut res: HashMap<usize, HashSet<String>> = HashMap::new();
|
||||||
|
for word in words {
|
||||||
|
let len = word.len();
|
||||||
|
let entry = res.entry(len).or_insert_with(HashSet::new);
|
||||||
|
entry.insert(word.to_string());
|
||||||
|
}
|
||||||
|
res
|
||||||
|
}
|
||||||
6
src/lib.rs
Normal file
6
src/lib.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
mod inflect;
|
||||||
|
|
||||||
|
/// inflect_rs is a Rust port of the Python inflect library.
|
||||||
|
/// It is used to generate plurals, ordinals, indefinite articles, and to convert numbers to words.
|
||||||
|
|
||||||
|
pub use crate::inflect::*;
|
||||||
37
tests/inflect_tests.rs
Normal file
37
tests/inflect_tests.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
use inflect::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_enclose() {
|
||||||
|
assert_eq!(enclose("foo"), "(?:foo)");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_joinstem() {
|
||||||
|
assert_eq!(
|
||||||
|
joinstem(Some(-2), Some(vec!["ephemeris", "iris", ".*itis"])),
|
||||||
|
"(?:ephemer|ir|.*it)"
|
||||||
|
);
|
||||||
|
assert_eq!(joinstem(None, Some(vec!["ephemeris"])), "(?:ephemeris)");
|
||||||
|
assert_eq!(joinstem(Some(5), None), "(?:)");
|
||||||
|
assert_eq!(joinstem(None, None), "(?:)");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bysize() {
|
||||||
|
let words = vec!["ant", "cat", "dog", "pig", "frog", "goat", "horse", "elephant"];
|
||||||
|
let result = bysize(words);
|
||||||
|
if let Some(set) = result.get(&3) {
|
||||||
|
let mut sorted_words: Vec<&String> = set.iter().collect();
|
||||||
|
sorted_words.sort();
|
||||||
|
assert_eq!(sorted_words, vec!["ant", "cat", "dog", "pig"]);
|
||||||
|
}
|
||||||
|
if let Some(set) = result.get(&4) {
|
||||||
|
let mut sorted_words: Vec<&String> = set.iter().collect();
|
||||||
|
sorted_words.sort();
|
||||||
|
assert_eq!(sorted_words, vec!["frog", "goat"]);
|
||||||
|
}
|
||||||
|
if let Some(set) = result.get(&5) {
|
||||||
|
let sorted_words: Vec<&String> = set.iter().collect();
|
||||||
|
assert_eq!(sorted_words, vec!["horse"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue