test_word()

This commit is contained in:
Lewis Wynne 2025-03-11 13:55:17 +00:00
parent 04718f77b2
commit 4da4f86119
2 changed files with 386 additions and 369 deletions

View file

@ -1,5 +1,5 @@
use std::collections::{ HashMap, HashSet };
use regex::Regex; use regex::Regex;
use std::collections::{HashMap, HashSet};
/// Encloses a string 's' in a non-capturing group. /// Encloses a string 's' in a non-capturing group.
pub fn enclose(s: &str) -> String { pub fn enclose(s: &str) -> String {
@ -13,7 +13,11 @@ pub fn joinstem(cutpoint: Option<i32>, words: Option<Vec<String>>) -> String {
.iter() .iter()
.map(|w| { .map(|w| {
if let Some(c) = cutpoint { if let Some(c) = cutpoint {
if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] } if c < 0 {
&w[..w.len() - (-c as usize)]
} else {
&w[..c as usize]
}
} else { } else {
w w
} }
@ -38,8 +42,13 @@ pub fn make_pl_si_lists(
list: Vec<String>, list: Vec<String>,
pl_ending: &str, pl_ending: &str,
si_ending_size: Option<i32>, si_ending_size: Option<i32>,
do_joinstem: bool do_joinstem: bool,
) -> (Vec<String>, HashMap<usize, HashSet<String>>, HashMap<usize, HashSet<String>>, String) { ) -> (
Vec<String>,
HashMap<usize, HashSet<String>>,
HashMap<usize, HashSet<String>>,
String,
) {
let si_ending_size = si_ending_size.map(|size| -size); let si_ending_size = si_ending_size.map(|size| -size);
let si_list: Vec<String> = list let si_list: Vec<String> = list
.iter() .iter()
@ -70,7 +79,7 @@ fn pl_sb_irregular_s() -> HashMap<String, String> {
("penis", "penises|penes"), ("penis", "penises|penes"),
("testis", "testes"), ("testis", "testes"),
("atlas", "atlases|atlantes"), ("atlas", "atlases|atlantes"),
("yes", "yeses") ("yes", "yeses"),
] ]
.iter() .iter()
.map(|(k, v)| (k.to_string(), v.to_string())) .map(|(k, v)| (k.to_string(), v.to_string()))
@ -111,7 +120,7 @@ fn pl_sb_irregular() -> HashMap<String, String> {
("mary", "maries"), ("mary", "maries"),
("talouse", "talouses"), ("talouse", "talouses"),
("rom", "roma"), ("rom", "roma"),
("carmen", "carmina") ("carmen", "carmina"),
] ]
.iter() .iter()
.map(|(k, v)| (k.to_string(), v.to_string())) .map(|(k, v)| (k.to_string(), v.to_string()))
@ -121,20 +130,25 @@ fn pl_sb_irregular() -> HashMap<String, String> {
} }
fn pl_sb_irregular_caps() -> HashMap<&'static str, &'static str> { fn pl_sb_irregular_caps() -> HashMap<&'static str, &'static str> {
return vec![("Romany", "Romanies"), ("Jerry", "Jerrys"), ("Mary", "Marys"), ("Rom", "Roma")] return vec![
("Romany", "Romanies"),
("Jerry", "Jerrys"),
("Mary", "Marys"),
("Rom", "Roma"),
]
.into_iter() .into_iter()
.collect(); .collect();
} }
fn pl_sb_irregular_compound() -> HashMap<&'static str, &'static str> { fn pl_sb_irregular_compound() -> HashMap<&'static str, &'static str> {
return vec![("prima donna", "prima donnas|prime donne")].into_iter().collect(); return vec![("prima donna", "prima donnas|prime donne")]
.into_iter()
.collect();
} }
fn si_sb_irregular() -> HashMap<String, String> { fn si_sb_irregular() -> HashMap<String, String> {
let mut si_sb_irregular: HashMap<String, String> = pl_sb_irregular() let mut si_sb_irregular: HashMap<String, String> =
.into_iter() pl_sb_irregular().into_iter().map(|(k, v)| (v, k)).collect();
.map(|(k, v)| (v, k))
.collect();
let mut keys_to_remove = Vec::new(); let mut keys_to_remove = Vec::new();
let keys: Vec<String> = si_sb_irregular.keys().cloned().collect(); let keys: Vec<String> = si_sb_irregular.keys().cloned().collect();
for k in keys.iter() { for k in keys.iter() {
@ -190,10 +204,7 @@ fn pl_sb_z_zes_bysize() -> HashMap<usize, HashSet<String>> {
} }
fn sb_ze_zes_list() -> Vec<String> { fn sb_ze_zes_list() -> Vec<String> {
return vec!["snooze"] return vec!["snooze"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn sb_ze_zes_bysize() -> HashMap<usize, HashSet<String>> { fn sb_ze_zes_bysize() -> HashMap<usize, HashSet<String>> {
@ -208,10 +219,7 @@ fn pl_sb_c_is_ides_complete() -> Vec<String> {
} }
fn pl_sb_c_is_ides_endings() -> Vec<String> { fn pl_sb_c_is_ides_endings() -> Vec<String> {
return vec!["itis"] return vec!["itis"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn pl_sb_c_is_ides() -> String { fn pl_sb_c_is_ides() -> String {
@ -221,7 +229,7 @@ fn pl_sb_c_is_ides() -> String {
.chain( .chain(
pl_sb_c_is_ides_endings() pl_sb_c_is_ides_endings()
.into_iter() .into_iter()
.map(|w| format!(".*{}", w)) .map(|w| format!(".*{}", w)),
) )
.collect(); .collect();
return joinstem(Some(-2), Some(pl_sb_c_is_ides)); return joinstem(Some(-2), Some(pl_sb_c_is_ides));
@ -270,7 +278,7 @@ fn pl_sb_c_a_ata_list() -> Vec<String> {
"stoma", "stoma",
"trauma", "trauma",
"gumma", "gumma",
"pragma" "pragma",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -332,7 +340,7 @@ fn pl_sb_c_a_ae_list() -> Vec<String> {
"aurora", "aurora",
"umbra", "umbra",
"flora", "flora",
"fauna" "fauna",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -388,7 +396,7 @@ fn pl_sb_u_um_a_list() -> Vec<String> {
"datum", "datum",
"ovum", "ovum",
"extremum", "extremum",
"candelabrum" "candelabrum",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -442,7 +450,7 @@ fn pl_sb_c_um_a_list() -> Vec<String> {
"vacuum", "vacuum",
"velum", "velum",
"consortium", "consortium",
"arboretum" "arboretum",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -475,7 +483,7 @@ fn pl_sb_u_us_i_list() -> Vec<String> {
"nucleus", "nucleus",
"stimulus", "stimulus",
"meniscus", "meniscus",
"sarcophagus" "sarcophagus",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -513,7 +521,7 @@ fn pl_sb_c_us_i_list() -> Vec<String> {
"umbilicus", "umbilicus",
"uterus", "uterus",
"hippopotamus", "hippopotamus",
"cactus" "cactus",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -537,7 +545,15 @@ fn pl_sb_c_us_i() -> String {
} }
fn pl_sb_c_us_us() -> Vec<String> { fn pl_sb_c_us_us() -> Vec<String> {
return vec!["status", "apparatus", "prospectus", "sinus", "hiatus", "impetus", "plexus"] return vec![
"status",
"apparatus",
"prospectus",
"sinus",
"hiatus",
"impetus",
"plexus",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -557,7 +573,7 @@ fn pl_sb_u_on_a_list() -> Vec<String> {
"noumenon", "noumenon",
"organon", "organon",
"asyndeton", "asyndeton",
"hyperbaton" "hyperbaton",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -581,10 +597,7 @@ fn pl_sb_u_on_a() -> String {
} }
fn pl_sb_c_on_a_list() -> Vec<String> { fn pl_sb_c_on_a_list() -> Vec<String> {
return vec!["oxymoron"] return vec!["oxymoron"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn si_sb_c_on_a_list() -> Vec<String> { fn si_sb_c_on_a_list() -> Vec<String> {
@ -604,7 +617,16 @@ fn pl_sb_c_on_a() -> String {
} }
fn pl_sb_c_o_i() -> Vec<String> { fn pl_sb_c_o_i() -> Vec<String> {
return vec!["solo", "soprano", "basso", "alto", "contralto", "tempo", "piano", "virtuoso"] return vec![
"solo",
"soprano",
"basso",
"alto",
"contralto",
"tempo",
"piano",
"virtuoso",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -619,7 +641,7 @@ fn si_sb_c_o_i_bysize() -> HashMap<usize, HashSet<String>> {
pl_sb_c_o_i() pl_sb_c_o_i()
.iter() .iter()
.map(|w| format!("{}i", &w[..w.len() - 1])) .map(|w| format!("{}i", &w[..w.len() - 1]))
.collect() .collect(),
); );
} }
@ -837,7 +859,7 @@ fn pl_sb_u_o_os_endings() -> Vec<String> {
"Yamoussoukro", "Yamoussoukro",
"yo-yo", "yo-yo",
"zero", "zero",
"Zibo" "Zibo",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -855,7 +877,7 @@ fn si_sb_u_o_os_bysize() -> HashMap<usize, HashSet<String>> {
pl_sb_u_o_os_endings() pl_sb_u_o_os_endings()
.iter() .iter()
.map(|w| format!("{}s", w)) .map(|w| format!("{}s", w))
.collect() .collect(),
); );
} }
@ -929,7 +951,9 @@ fn pl_sb_u_ix_ices() -> String {
} }
fn pl_sb_c_ex_ices_list() -> Vec<String> { fn pl_sb_c_ex_ices_list() -> Vec<String> {
return vec!["vortex", "vertex", "cortex", "latex", "pontifex", "apex", "index", "simplex"] return vec![
"vortex", "vertex", "cortex", "latex", "pontifex", "apex", "index", "simplex",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -952,10 +976,7 @@ fn pl_sb_c_ex_ices() -> String {
} }
fn pl_sb_c_ix_ices_list() -> Vec<String> { fn pl_sb_c_ix_ices_list() -> Vec<String> {
return vec!["appendix"] return vec!["appendix"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn si_sb_c_ix_ices_list() -> Vec<String> { fn si_sb_c_ix_ices_list() -> Vec<String> {
@ -1022,20 +1043,8 @@ fn pl_sb_c_im() -> String {
fn pl_sb_u_man_mans_list() -> Vec<String> { fn pl_sb_u_man_mans_list() -> Vec<String> {
return vec![ return vec![
"ataman", "ataman", "caiman", "cayman", "ceriman", "desman", "dolman", "farman", "harman", "hetman",
"caiman", "human", "leman", "ottoman", "shaman", "talisman",
"cayman",
"ceriman",
"desman",
"dolman",
"farman",
"harman",
"hetman",
"human",
"leman",
"ottoman",
"shaman",
"talisman"
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1060,7 +1069,7 @@ fn pl_sb_u_man_mans_caps_list() -> Vec<String> {
"Tacoman", "Tacoman",
"Yakiman", "Yakiman",
"Yokohaman", "Yokohaman",
"Yuman" "Yuman",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1146,7 +1155,7 @@ fn pl_sb_uninflected_s_complete() -> Vec<String> {
"innings", "innings",
"news", "news",
"mews", "mews",
"haggis" "haggis",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1165,7 +1174,7 @@ fn pl_sb_uninflected_s() -> Vec<String> {
pl_sb_uninflected_s.extend( pl_sb_uninflected_s.extend(
pl_sb_uninflected_s_endings() pl_sb_uninflected_s_endings()
.iter() .iter()
.map(|w| format!(".*{}", w)) .map(|w| format!(".*{}", w)),
); );
pl_sb_uninflected_s pl_sb_uninflected_s
} }
@ -1200,7 +1209,7 @@ fn pl_sb_uninflected_herd() -> Vec<String> {
"teal", "teal",
"turbot", "turbot",
"water fowl", "water fowl",
"water-fowl" "water-fowl",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1227,7 +1236,7 @@ fn pl_sb_uninflected_complete() -> Vec<String> {
"offspring", "offspring",
"pence", "pence",
"quid", "quid",
"hertz" "hertz",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1260,7 +1269,7 @@ fn pl_sb_uninflected_caps() -> Vec<String> {
"Shavese", "Shavese",
"Vermontese", "Vermontese",
"Wenchowese", "Wenchowese",
"Yengeese" "Yengeese",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1281,7 +1290,7 @@ fn pl_sb_uninflected_endings() -> Vec<String> {
"lese", "lese",
"mese", "mese",
"pox", "pox",
"craft" "craft",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1326,7 +1335,7 @@ fn pl_sb_singular_s_complete() -> Vec<String> {
"polis", "polis",
"rhinoceros", "rhinoceros",
"sassafras", "sassafras",
"trellis" "trellis",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1365,29 +1374,18 @@ fn si_sb_singular_s_bysize() -> HashMap<usize, HashSet<String>> {
} }
fn pl_sb_singular_s_es() -> Vec<String> { fn pl_sb_singular_s_es() -> Vec<String> {
return vec!["[A-Z].*es"] return vec!["[A-Z].*es"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn pl_sb_singular_s() -> String { fn pl_sb_singular_s() -> String {
let mut concat: Vec<String> = Vec::new(); let mut concat: Vec<String> = Vec::new();
concat.extend( concat.extend(pl_sb_singular_s_complete().iter().map(|w| w.to_string()));
pl_sb_singular_s_complete()
.iter()
.map(|w| w.to_string())
);
concat.extend( concat.extend(
pl_sb_singular_s_endings() pl_sb_singular_s_endings()
.iter() .iter()
.map(|w| format!(".*{}", w)) .map(|w| format!(".*{}", w)),
);
concat.extend(
pl_sb_singular_s_es()
.iter()
.map(|w| w.to_string())
); );
concat.extend(pl_sb_singular_s_es().iter().map(|w| w.to_string()));
return enclose(&concat.join("|")); return enclose(&concat.join("|"));
} }
@ -1435,7 +1433,7 @@ fn si_sb_use_uses() -> Vec<String> {
"spouses", "spouses",
"suffuses", "suffuses",
"transfuses", "transfuses",
"uses" "uses",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1599,7 +1597,7 @@ fn si_sb_ies_ie_case() -> Vec<String> {
"Willies", "Willies",
"Winnies", "Winnies",
"Wylies", "Wylies",
"Yorkies" "Yorkies",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1678,7 +1676,7 @@ fn si_sb_ies_ie() -> Vec<String> {
"veggies", "veggies",
"vies", "vies",
"yuppies", "yuppies",
"zombies" "zombies",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1701,7 +1699,7 @@ fn si_sb_oes_oe_case() -> Vec<String> {
"Roscoes", "Roscoes",
"Tahoes", "Tahoes",
"Tippecanoes", "Tippecanoes",
"Zoes" "Zoes",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1725,7 +1723,7 @@ fn si_sb_oes_oe() -> Vec<String> {
"throes", "throes",
"tiptoes", "tiptoes",
"toes", "toes",
"woes" "woes",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1754,7 +1752,7 @@ fn si_sb_ches_che_case() -> Vec<String> {
"Comanches", "Comanches",
"Nietzsches", "Nietzsches",
"Porsches", "Porsches",
"Roches" "Roches",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1782,7 +1780,7 @@ fn si_sb_ches_che() -> Vec<String> {
"quiches", "quiches",
"stomachaches", "stomachaches",
"toothaches", "toothaches",
"tranches" "tranches",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1804,7 +1802,14 @@ fn si_sb_sses_sse_case() -> Vec<String> {
} }
fn si_sb_sses_sse() -> Vec<String> { fn si_sb_sses_sse() -> Vec<String> {
return vec!["bouillabaisses", "crevasses", "demitasses", "impasses", "mousses", "posses"] return vec![
"bouillabaisses",
"crevasses",
"demitasses",
"impasses",
"mousses",
"posses",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -1827,7 +1832,7 @@ fn si_sb_ves_ve() -> Vec<String> {
"resolves", "resolves",
"salves", "salves",
"twelves", "twelves",
"valves" "valves",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1843,14 +1848,17 @@ fn plverb_special_s() -> String {
concat.extend( concat.extend(
vec!["(.*[csx])is", "(.*)ceps", "[A-Z].*s"] vec!["(.*[csx])is", "(.*)ceps", "[A-Z].*s"]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string()),
); );
return enclose(&concat.join("|")); return enclose(&concat.join("|"));
} }
fn _pl_sb_postfix_adj_defn() -> HashMap<String, String> { fn _pl_sb_postfix_adj_defn() -> HashMap<String, String> {
let mut m = HashMap::new(); let mut m = HashMap::new();
m.insert("general".to_string(), enclose(r"(?!major|lieutenant|brigadier|adjutant|.*star)\S+")); m.insert(
"general".to_string(),
enclose(r"(?!major|lieutenant|brigadier|adjutant|.*star)\S+"),
);
m.insert("martial".to_string(), enclose("court")); m.insert("martial".to_string(), enclose("court"));
m.insert("force".to_string(), enclose("pound")); m.insert("force".to_string(), enclose("pound"));
m m
@ -1933,7 +1941,7 @@ fn si_sb_es_is() -> Vec<String> {
"theses", "theses",
"thromboses", "thromboses",
"tuberculoses", "tuberculoses",
"urinalyses" "urinalyses",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -1942,46 +1950,10 @@ fn si_sb_es_is() -> Vec<String> {
fn pl_prep_list() -> Vec<String> { fn pl_prep_list() -> Vec<String> {
return vec![ return vec![
"about", "about", "above", "across", "after", "among", "around", "at", "athwart", "before",
"above", "behind", "below", "beneath", "beside", "besides", "between", "betwixt", "beyond", "but",
"across", "by", "during", "except", "for", "from", "in", "into", "near", "of", "off", "on", "onto",
"after", "out", "over", "since", "till", "to", "under", "until", "unto", "upon", "with",
"among",
"around",
"at",
"athwart",
"before",
"behind",
"below",
"beneath",
"beside",
"besides",
"between",
"betwixt",
"beyond",
"but",
"by",
"during",
"except",
"for",
"from",
"in",
"into",
"near",
"of",
"off",
"on",
"onto",
"out",
"over",
"since",
"till",
"to",
"under",
"until",
"unto",
"upon",
"with"
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2015,7 +1987,7 @@ fn singular_pronoun_genders() -> Vec<String> {
"masculine", "masculine",
"gender-neutral", "gender-neutral",
"feminine or masculine", "feminine or masculine",
"masculine or feminine" "masculine or feminine",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2043,7 +2015,7 @@ fn pl_pron_nom() -> HashMap<String, String> {
("hers", "theirs"), ("hers", "theirs"),
("his", "theirs"), ("his", "theirs"),
("its", "theirs"), ("its", "theirs"),
("theirs", "theirs") ("theirs", "theirs"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2061,7 +2033,7 @@ fn pl_pron_acc() -> HashMap<String, String> {
("it", "them"), ("it", "them"),
("itself", "themselves"), ("itself", "themselves"),
("them", "them"), ("them", "them"),
("themself", "themselves") ("themself", "themselves"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2069,7 +2041,13 @@ fn pl_pron_acc() -> HashMap<String, String> {
} }
fn pl_pron_acc_keys() -> String { fn pl_pron_acc_keys() -> String {
return enclose(&pl_pron_acc().keys().cloned().collect::<Vec<String>>().join("|")); return enclose(
&pl_pron_acc()
.keys()
.cloned()
.collect::<Vec<String>>()
.join("|"),
);
} }
fn pl_pron_acc_keys_bysize() -> HashMap<usize, HashSet<String>> { fn pl_pron_acc_keys_bysize() -> HashMap<usize, HashSet<String>> {
@ -2088,8 +2066,18 @@ fn pron_tuples() -> Vec<(&'static str, &'static str, &'static str, &'static str)
("nom", "themselves", "feminine", "herself"), ("nom", "themselves", "feminine", "herself"),
("nom", "themselves", "masculine", "himself"), ("nom", "themselves", "masculine", "himself"),
("nom", "themselves", "gender-neutral", "themself"), ("nom", "themselves", "gender-neutral", "themself"),
("nom", "themselves", "feminine or masculine", "herself or himself"), (
("nom", "themselves", "masculine or feminine", "himself or herself"), "nom",
"themselves",
"feminine or masculine",
"herself or himself",
),
(
"nom",
"themselves",
"masculine or feminine",
"himself or herself",
),
("nom", "theirs", "neuter", "its"), ("nom", "theirs", "neuter", "its"),
("nom", "theirs", "feminine", "hers"), ("nom", "theirs", "feminine", "hers"),
("nom", "theirs", "masculine", "his"), ("nom", "theirs", "masculine", "his"),
@ -2106,8 +2094,18 @@ fn pron_tuples() -> Vec<(&'static str, &'static str, &'static str, &'static str)
("acc", "themselves", "feminine", "herself"), ("acc", "themselves", "feminine", "herself"),
("acc", "themselves", "masculine", "himself"), ("acc", "themselves", "masculine", "himself"),
("acc", "themselves", "gender-neutral", "themself"), ("acc", "themselves", "gender-neutral", "themself"),
("acc", "themselves", "feminine or masculine", "herself or himself"), (
("acc", "themselves", "masculine or feminine", "himself or herself") "acc",
"themselves",
"feminine or masculine",
"herself or himself",
),
(
"acc",
"themselves",
"masculine or feminine",
"himself or herself",
),
]; ];
} }
@ -2131,8 +2129,12 @@ fn si_pron() -> HashMap<String, HashMap<String, HashMap<String, String>>> {
for data in pron_tuples() { for data in pron_tuples() {
let (this_case, this_plur, this_gend, this_sing) = data; let (this_case, this_plur, this_gend, this_sing) = data;
let case = si_pron.entry(this_case.to_string()).or_insert_with(HashMap::new); let case = si_pron
let plur = case.entry(this_plur.to_string()).or_insert_with(HashMap::new); .entry(this_case.to_string())
.or_insert_with(HashMap::new);
let plur = case
.entry(this_plur.to_string())
.or_insert_with(HashMap::new);
plur.insert(this_gend.to_string(), this_sing.to_string()); plur.insert(this_gend.to_string(), this_sing.to_string());
} }
@ -2141,15 +2143,13 @@ fn si_pron() -> HashMap<String, HashMap<String, HashMap<String, String>>> {
pub fn get_si_pron(thecase: &str, word: &str, gender: Option<&str>) -> String { pub fn get_si_pron(thecase: &str, word: &str, gender: Option<&str>) -> String {
match si_pron().get(thecase) { match si_pron().get(thecase) {
Some(case) => Some(case) => match case.get(word) {
match case.get(word) { Some(sing) => match sing.get(gender.unwrap_or("N/A")) {
Some(sing) =>
match sing.get(gender.unwrap_or("N/A")) {
Some(specific) => specific.clone(), Some(specific) => specific.clone(),
None => sing.clone().values().next().unwrap().clone(), None => sing.clone().values().next().unwrap().clone(),
} },
None => panic!("No such case for word: {}", word), None => panic!("No such case for word: {}", word),
} },
None => panic!("No such case: {}", thecase), None => panic!("No such case: {}", thecase),
} }
} }
@ -2164,7 +2164,7 @@ fn plverb_irregular_pres() -> HashMap<String, String> {
("have", "have"), ("have", "have"),
("has", "have"), ("has", "have"),
("do", "do"), ("do", "do"),
("does", "do") ("does", "do"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2204,7 +2204,7 @@ fn plverb_ambiguous_pres() -> HashMap<String, String> {
("sleep", "sleep"), ("sleep", "sleep"),
("sleeps", "sleep"), ("sleeps", "sleep"),
("view", "view"), ("view", "view"),
("views", "view") ("views", "view"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2214,27 +2214,21 @@ fn plverb_ambiguous_pres() -> HashMap<String, String> {
fn plverb_ambiguous_pres_keys() -> Regex { fn plverb_ambiguous_pres_keys() -> Regex {
let pattern = format!( let pattern = format!(
r"^({})((\s.*)?)$", r"^({})((\s.*)?)$",
enclose(&plverb_ambiguous_pres().keys().cloned().collect::<Vec<String>>().join("|")) enclose(
&plverb_ambiguous_pres()
.keys()
.cloned()
.collect::<Vec<String>>()
.join("|")
)
); );
return Regex::new(&pattern).expect("Failed to compile regex"); return Regex::new(&pattern).expect("Failed to compile regex");
} }
fn plverb_irregular_non_pres() -> Vec<String> { fn plverb_irregular_non_pres() -> Vec<String> {
return vec![ return vec![
"did", "did", "had", "ate", "made", "put", "spent", "fought", "sank", "gave", "sought", "shall",
"had", "could", "ought", "should",
"ate",
"made",
"put",
"spent",
"fought",
"sank",
"gave",
"sought",
"shall",
"could",
"ought",
"should"
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2254,17 +2248,11 @@ fn pl_v_oes_oe() -> Vec<String> {
} }
fn pl_v_oes_oe_endings_size4() -> Vec<String> { fn pl_v_oes_oe_endings_size4() -> Vec<String> {
return vec!["hoes", "toes"] return vec!["hoes", "toes"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn pl_v_oes_oe_endings_size5() -> Vec<String> { fn pl_v_oes_oe_endings_size5() -> Vec<String> {
return vec!["shoes"] return vec!["shoes"].iter().map(|s| s.to_string()).collect();
.iter()
.map(|s| s.to_string())
.collect();
} }
fn pl_count_zero() -> Vec<String> { fn pl_count_zero() -> Vec<String> {
@ -2282,7 +2270,12 @@ fn pl_count_one() -> Vec<String> {
} }
fn pl_adj_special() -> HashMap<String, String> { fn pl_adj_special() -> HashMap<String, String> {
return vec![("a", "some"), ("an", "some"), ("this", "these"), ("that", "those")] return vec![
("a", "some"),
("an", "some"),
("this", "these"),
("that", "those"),
]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
.collect(); .collect();
@ -2291,7 +2284,13 @@ fn pl_adj_special() -> HashMap<String, String> {
fn pl_adj_special_keys() -> Regex { fn pl_adj_special_keys() -> Regex {
let pattern = format!( let pattern = format!(
r"^({})$", r"^({})$",
enclose(&pl_adj_special().keys().cloned().collect::<Vec<String>>().join("|")) enclose(
&pl_adj_special()
.keys()
.cloned()
.collect::<Vec<String>>()
.join("|")
)
); );
return Regex::new(&pattern).expect("Failed to compile regex"); return Regex::new(&pattern).expect("Failed to compile regex");
} }
@ -2303,7 +2302,7 @@ fn pl_adj_poss() -> HashMap<String, String> {
("its", "their"), ("its", "their"),
("her", "their"), ("her", "their"),
("his", "their"), ("his", "their"),
("their", "their") ("their", "their"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2313,7 +2312,13 @@ fn pl_adj_poss() -> HashMap<String, String> {
fn pl_adj_poss_keys() -> Regex { fn pl_adj_poss_keys() -> Regex {
let pattern = format!( let pattern = format!(
r"^({})$", r"^({})$",
enclose(&pl_adj_poss().keys().cloned().collect::<Vec<String>>().join("|")) enclose(
&pl_adj_poss()
.keys()
.cloned()
.collect::<Vec<String>>()
.join("|")
)
); );
return Regex::new(&pattern).expect("Failed to compile regex"); return Regex::new(&pattern).expect("Failed to compile regex");
} }
@ -2325,9 +2330,8 @@ fn a_abbrev() -> Regex {
} }
fn a_y_cons() -> Regex { fn a_y_cons() -> Regex {
return Regex::new(r"^(y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt))").expect( return Regex::new(r"^(y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt))")
"Failed to compile regex" .expect("Failed to compile regex");
);
} }
fn a_explicit_a() -> Regex { fn a_explicit_a() -> Regex {
@ -2335,9 +2339,8 @@ fn a_explicit_a() -> Regex {
} }
fn a_explicit_an() -> Regex { fn a_explicit_an() -> Regex {
return Regex::new(r"^((?:euler|hour(?!i)|heir|honest|hono[ur]|mpeg))").expect( return Regex::new(r"^((?:euler|hour(?!i)|heir|honest|hono[ur]|mpeg))")
"Failed to compile regex" .expect("Failed to compile regex");
);
} }
fn a_ordinal_a() -> Regex { fn a_ordinal_a() -> Regex {
@ -2362,7 +2365,7 @@ fn nth() -> HashMap<u32, String> {
(9, "th"), (9, "th"),
(11, "th"), (11, "th"),
(12, "th"), (12, "th"),
(13, "th") (13, "th"),
] ]
.iter() .iter()
.map(|&(k, v)| (k, v.to_string())) .map(|&(k, v)| (k, v.to_string()))
@ -2382,7 +2385,7 @@ fn ordinal() -> HashMap<String, String> {
("five", "fifth"), ("five", "fifth"),
("eight", "eighth"), ("eight", "eighth"),
("nine", "ninth"), ("nine", "ninth"),
("twelve", "twelfth") ("twelve", "twelfth"),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v.to_string())) .map(|&(k, v)| (k.to_string(), v.to_string()))
@ -2390,12 +2393,17 @@ fn ordinal() -> HashMap<String, String> {
} }
pub fn ordinal_suff() -> Regex { pub fn ordinal_suff() -> Regex {
let pattern = format!("({})", ordinal().keys().cloned().collect::<Vec<String>>().join("|")); let pattern = format!(
"({})",
ordinal().keys().cloned().collect::<Vec<String>>().join("|")
);
return Regex::new(&format!("{}\\z", pattern)).expect("Failed to compile regex"); return Regex::new(&format!("{}\\z", pattern)).expect("Failed to compile regex");
} }
fn unit() -> Vec<String> { fn unit() -> Vec<String> {
return vec!["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] return vec![
"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -2412,7 +2420,7 @@ fn teen() -> Vec<String> {
"sixteen", "sixteen",
"seventeen", "seventeen",
"eighteen", "eighteen",
"nineteen" "nineteen",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2421,16 +2429,7 @@ fn teen() -> Vec<String> {
fn ten() -> Vec<String> { fn ten() -> Vec<String> {
return vec![ return vec![
"", "", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
"",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety"
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2450,7 +2449,7 @@ fn mill() -> Vec<String> {
" septillion", " septillion",
" octillion", " octillion",
" nonillion", " nonillion",
" decillion" " decillion",
] ]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
@ -2464,7 +2463,7 @@ fn def_classical() -> HashMap<String, bool> {
("herd", false), ("herd", false),
("names", true), ("names", true),
("persons", false), ("persons", false),
("ancient", false) ("ancient", false),
] ]
.iter() .iter()
.map(|&(k, v)| (k.to_string(), v)) .map(|&(k, v)| (k.to_string(), v))
@ -2493,7 +2492,9 @@ fn string_to_constant() -> HashMap<String, Option<bool>> {
.collect(); .collect();
} }
fn dollar_digits() -> Regex { Regex::new("\\$(\\d+)").expect("Failed to compile Regex") } fn dollar_digits() -> Regex {
Regex::new("\\$(\\d+)").expect("Failed to compile Regex")
}
// TODO: Pre-compiled REGEX objects, ln1950 @ og inflect // TODO: Pre-compiled REGEX objects, ln1950 @ og inflect
@ -2501,7 +2502,7 @@ pub struct Words {
pub lowered: String, pub lowered: String,
pub split_: Vec<String>, pub split_: Vec<String>,
pub first: String, pub first: String,
pub last: String pub last: String,
} }
impl Words { impl Words {
@ -2511,7 +2512,7 @@ impl Words {
lowered: s.to_lowercase(), lowered: s.to_lowercase(),
split_: split.clone(), split_: split.clone(),
first: split.get(0).cloned().unwrap_or_else(String::new), first: split.get(0).cloned().unwrap_or_else(String::new),
last: split.last().cloned().unwrap_or_else(String::new) last: split.last().cloned().unwrap_or_else(String::new),
} }
} }
} }
@ -2562,7 +2563,7 @@ impl Engine {
} }
} }
fn checkpat(self, pattern: Option<Word>) { fn checkpat(self, _pattern: Option<Word>) {
return; return;
} }
@ -2572,7 +2573,9 @@ impl Engine {
} }
} }
pub fn check_gender(&self) -> &String { &self.the_gender } pub fn check_gender(&self) -> &String {
&self.the_gender
}
pub fn get_count<T: Into<IntOrString>>(&self, count: Option<T>) -> i32 { pub fn get_count<T: Into<IntOrString>>(&self, count: Option<T>) -> i32 {
if count.is_none() { if count.is_none() {
@ -2587,8 +2590,10 @@ impl Engine {
match c { match c {
IntOrString::Int(n) => return n, IntOrString::Int(n) => return n,
IntOrString::Str(s) => { IntOrString::Str(s) => {
if pl_count_one().contains(&s) || if pl_count_one().contains(&s)
(*self.classical_dict.get("zero").unwrap_or(&false) && pl_count_zero().contains(&s.to_lowercase())) { || (*self.classical_dict.get("zero").unwrap_or(&false)
&& pl_count_zero().contains(&s.to_lowercase()))
{
return 1; return 1;
} else { } else {
return 2; return 2;
@ -2604,13 +2609,19 @@ enum IntOrString {
} }
impl From<i32> for IntOrString { impl From<i32> for IntOrString {
fn from(n: i32) -> Self { IntOrString::Int(n) } fn from(n: i32) -> Self {
IntOrString::Int(n)
}
} }
impl From<String> for IntOrString { impl From<String> for IntOrString {
fn from(s: String) -> Self { IntOrString::Str(s) } fn from(s: String) -> Self {
IntOrString::Str(s)
}
} }
impl From<&str> for IntOrString { impl From<&str> for IntOrString {
fn from(s: &str) -> Self { IntOrString::Str(s.to_string()) } fn from(s: &str) -> Self {
IntOrString::Str(s.to_string())
}
} }

View file

@ -10,18 +10,27 @@ fn test_joinstem() {
assert_eq!( assert_eq!(
joinstem( joinstem(
Some(-2), Some(-2),
Some(vec!["ephemeris".to_string(), "iris".to_string(), ".*itis".to_string()]) Some(vec![
"ephemeris".to_string(),
"iris".to_string(),
".*itis".to_string()
])
), ),
"(?:ephemer|ir|.*it)" "(?:ephemer|ir|.*it)"
); );
assert_eq!(joinstem(None, Some(vec!["ephemeris".to_string()])), "(?:ephemeris)"); assert_eq!(
joinstem(None, Some(vec!["ephemeris".to_string()])),
"(?:ephemeris)"
);
assert_eq!(joinstem(Some(5), None), "(?:)"); assert_eq!(joinstem(Some(5), None), "(?:)");
assert_eq!(joinstem(None, None), "(?:)"); assert_eq!(joinstem(None, None), "(?:)");
} }
#[test] #[test]
fn test_bysize() { fn test_bysize() {
let words = vec!["ant", "cat", "dog", "pig", "frog", "goat", "horse", "elephant"] let words = vec![
"ant", "cat", "dog", "pig", "frog", "goat", "horse", "elephant",
]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect(); .collect();
@ -77,9 +86,6 @@ fn test_words() {
fn test_word() { fn test_word() {
let word = Word::new(String::from("fox")); let word = Word::new(String::from("fox"));
assert_eq!(word.unwrap().get(), "fox"); assert_eq!(word.unwrap().get(), "fox");
let word = Word::new(String::from(""));
word.unwrap();
} }
#[test] #[test]