use std::collections::{ HashMap, HashSet }; /// Encloses a string 's' in a non-capturing group. pub fn enclose(s: &str) -> String { format!("(?:{})", s) } /// Joins the stem of each word in 'words' into a string for Regex. pub fn joinstem(cutpoint: Option, words: Option>) -> String { let words = words.unwrap_or_else(|| Vec::new()); let stem = words .iter() .map(|w| { if let Some(c) = cutpoint { if c < 0 { &w[..w.len() - (-c as usize)] } else { &w[..c as usize] } } else { w } }) .collect::>() .join("|"); enclose(&stem) } /// From a list of words, returns a HashMap of HashSets of words, keyed by word length. pub fn bysize(words: Vec) -> HashMap> { let mut res: HashMap> = HashMap::new(); for word in words { let len = word.len(); let entry = res.entry(len).or_insert_with(HashSet::new); entry.insert(word.to_string()); } res } pub fn make_pl_si_lists( list: Vec, pl_ending: &str, si_ending_size: Option, do_joinstem: bool ) -> (Vec, HashMap>, HashMap>, String) { let si_ending_size = si_ending_size.map(|size| -size); let si_list: Vec = list .iter() .map(|w| { if let Some(size) = si_ending_size { format!("{}{}", &w[..w.len() - (size as usize)], pl_ending) } else { format!("{}{}", w, pl_ending) } }) .collect(); let pl_bysize = bysize(list.clone()); let si_bysize = bysize(si_list.clone()); if do_joinstem { let stem = joinstem(si_ending_size, Some(list)); (si_list, si_bysize, pl_bysize, stem) } else { (si_list, si_bysize, pl_bysize, String::new()) } } fn pl_sb_irregular_s() -> HashMap<&'static str, &'static str> { return vec![ ("corpus", "corpuses|corpora"), ("opus", "opuses|opera"), ("genus", "genera"), ("mythos", "mythoi"), ("penis", "penises|penes"), ("testis", "testes"), ("atlas", "atlases|atlantes"), ("yes", "yeses") ] .into_iter() .collect(); } fn pl_sb_irregular() -> HashMap<&'static str, &'static str> { let mut pl_sb_irregular: HashMap<&str, &str> = vec![ ("child", "children"), ("chili", "chilis|chilies"), ("brother", "brothers|brethren"), ("infinity", "infinities|infinity"), ("loaf", "loaves"), ("lore", "lores|lore"), ("hoof", "hoofs|hooves"), ("beef", "beefs|beeves"), ("thief", "thiefs|thieves"), ("money", "monies"), ("mongoose", "mongooses"), ("ox", "oxen"), ("cow", "cows|kine"), ("graffito", "graffiti"), ("octopus", "octopuses|octopodes"), ("genie", "genies|genii"), ("ganglion", "ganglions|ganglia"), ("trilby", "trilbys"), ("turf", "turfs|turves"), ("numen", "numina"), ("atman", "atmas"), ("occiput", "occiputs|occipita"), ("sabretooth", "sabretooths"), ("sabertooth", "sabertooths"), ("lowlife", "lowlifes"), ("flatfoot", "flatfoots"), ("tenderfoot", "tenderfoots"), ("romany", "romanies"), ("jerry", "jerries"), ("mary", "maries"), ("talouse", "talouses"), ("rom", "roma"), ("carmen", "carmina") ] .into_iter() .collect(); pl_sb_irregular.extend(pl_sb_irregular_s()); pl_sb_irregular } fn pl_sb_irregular_caps() -> HashMap<&'static str, &'static str> { return vec![("Romany", "Romanies"), ("Jerry", "Jerrys"), ("Mary", "Marys"), ("Rom", "Roma")] .into_iter() .collect(); } fn pl_sb_irregular_compound() -> HashMap<&'static str, &'static str> { return vec![("prima donna", "prima donnas|prime donne")].into_iter().collect(); } fn si_sb_irregular() -> HashMap<&'static str, &'static str> { let mut si_sb_irregular: HashMap<&str, &str> = pl_sb_irregular() .into_iter() .map(|(k, v)| (v, k)) .collect(); let mut keys_to_remove = Vec::new(); for &k in si_sb_irregular.keys() { if k.contains('|') { keys_to_remove.push(k); } } for k in keys_to_remove { si_sb_irregular.remove(&k); let (k1, k2) = k.split_once('|').unwrap(); si_sb_irregular.insert(k1, k); si_sb_irregular.insert(k2, k); } si_sb_irregular } fn si_sb_irregular_caps() -> HashMap<&'static str, &'static str> { return pl_sb_irregular_caps() .iter() .map(|(&k, &v)| (v, k)) .collect(); } fn si_sb_irregular_compound() -> HashMap<&'static str, &'static str> { let mut si_sb_irregular_compound: HashMap<&str, &str> = pl_sb_irregular_compound() .iter() .map(|(&k, &v)| (v, k)) .collect(); let mut keys_to_remove = Vec::new(); for &k in si_sb_irregular_compound.keys() { if k.contains('|') { keys_to_remove.push(k); } } for k in keys_to_remove { si_sb_irregular_compound.remove(&k); let (k1, k2) = k.split_once('|').unwrap(); si_sb_irregular_compound.insert(k1, k); si_sb_irregular_compound.insert(k2, k); } si_sb_irregular_compound } fn pl_sb_z_zes_list() -> Vec { return vec!["quartz", "topaz"] .iter() .map(|s| s.to_string()) .collect(); } fn pl_sb_z_zes_bysize() -> HashMap> { return bysize(pl_sb_z_zes_list()); } fn sb_ze_zes_list() -> Vec { return vec!["snooze"] .iter() .map(|s| s.to_string()) .collect(); } fn sb_ze_zes_bysize() -> HashMap> { return bysize(sb_ze_zes_list()); } fn pl_sb_c_is_ides_complete() -> Vec { return vec!["ephemeris", "iris", "clitoris", "chrysalis", "epididymis"] .iter() .map(|s| s.to_string()) .collect(); } fn pl_sb_c_is_ides_endings() -> Vec { return vec!["itis"] .iter() .map(|s| s.to_string()) .collect(); } fn pl_sb_c_is_ides() -> String { let endings = pl_sb_c_is_ides_endings() .into_iter() .map(|w| format!(".*{}", w)); let pl_sb_c_is_ides: Vec = pl_sb_c_is_ides_complete() .iter() .map(|s| s.to_string()) .chain(endings) .collect(); return joinstem(Some(-2), Some(pl_sb_c_is_ides)); } fn pl_sb_c_is_ides_list() -> Vec { let mut pl_sb_c_is_ides_complete = pl_sb_c_is_ides_complete(); pl_sb_c_is_ides_complete.append(&mut pl_sb_c_is_ides_endings()); pl_sb_c_is_ides_complete } fn si_sb_c_is_ides_list() -> Vec { return make_pl_si_lists(pl_sb_c_is_ides_list(), "ides", Some(2), true).0; } fn si_sb_c_is_ides_bysize() -> HashMap> { return make_pl_si_lists(pl_sb_c_is_ides_list(), "ides", Some(2), true).1; } fn pl_sb_c_is_ides_bysize() -> HashMap> { return make_pl_si_lists(pl_sb_c_is_ides_list(), "ides", Some(2), true).2; } fn pl_sb_c_a_ata_list() -> Vec { return vec![ "anathema", "bema", "carcinoma", "charisma", "diploma", "dogma", "drama", "edema", "enema", "enigma", "lemma", "lymphoma", "magma", "melisma", "miasma", "oedema", "sarcoma", "schema", "soma", "stigma", "stoma", "trauma", "gumma", "pragma" ] .iter() .map(|s| s.to_string()) .collect(); } fn si_sb_c_a_ata_list() -> Vec { return make_pl_si_lists(pl_sb_c_a_ata_list(), "ata", Some(1), false).0; } fn si_sb_c_a_ata_bysize() -> HashMap> { return make_pl_si_lists(pl_sb_c_a_ata_list(), "ata", Some(1), false).1; } fn pl_sb_c_a_ata_bysize() -> HashMap> { return make_pl_si_lists(pl_sb_c_a_ata_list(), "ata", Some(1), false).2; } fn pl_sb_c_a_ata() -> String { return make_pl_si_lists(pl_sb_c_a_ata_list(), "ata", Some(1), false).3; }