diff --git a/src/aff.rs b/src/aff.rs index fc74ed8..50d8615 100644 --- a/src/aff.rs +++ b/src/aff.rs @@ -1,3 +1,5 @@ +pub(crate) mod parser; + use crate::{ alloc::{ borrow::Cow, @@ -73,7 +75,7 @@ impl fmt::Display for UnknownFlagTypeError { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum ParseFlagError { NonAscii(char), MissingSecondChar(char), @@ -98,118 +100,6 @@ impl fmt::Display for ParseFlagError { } } -fn try_flag_from_u16(val: u16) -> Result { - Flag::new(val).ok_or(ParseFlagError::ZeroFlag) -} - -fn try_flag_from_u32(val: u32) -> Result { - if val > u16::MAX as u32 { - return Err(ParseFlagError::FlagAbove65535); - } - try_flag_from_u16(val as u16) -} - -fn try_flag_from_char(ch: char) -> Result { - try_flag_from_u32(ch as u32) -} - -impl FlagType { - pub fn parse_flag_from_str(&self, input: &str) -> Result { - use ParseFlagError::*; - assert!(!input.is_empty()); - - match self { - Self::Short => { - let mut chars = input.chars(); - let ch = chars.next().expect("asserted to be non-empty above"); - if ch.is_ascii() { - // The flag is ASCII: it's a valid `u8` so it can fit into a `u16`. - try_flag_from_u16(ch as u16) - } else { - Err(NonAscii(ch)) - } - } - Self::Long => { - let mut chars = input.chars(); - let c1 = chars.next().expect("asserted to be non-empty above"); - if !c1.is_ascii() { - return Err(NonAscii(c1)); - } - let c2 = match chars.next() { - Some(ch) => ch, - None => return Err(MissingSecondChar(c1)), - }; - if !c2.is_ascii() { - return Err(NonAscii(c2)); - } - - try_flag_from_u16(u16::from_ne_bytes([c1 as u8, c2 as u8])) - } - Self::Numeric => { - let number = input.parse::().map_err(ParseIntError)?; - try_flag_from_u16(number) - } - Self::Utf8 => { - let mut chars = input.chars(); - let ch = chars.next().expect("asserted to be non-empty above"); - try_flag_from_char(ch) - } - } - } - - pub fn parse_flags_from_chars(&self, mut chars: Chars) -> Result { - use ParseFlagError::*; - - match self { - Self::Short => { - chars - .map(|ch| { - if ch.is_ascii() { - // The flag is ASCII: it's a valid `u8` so it can fit into a `u16`. - try_flag_from_u16(ch as u16) - } else { - Err(ParseFlagError::NonAscii(ch)) - } - }) - .collect() - } - Self::Long => { - let mut flags = FlagSet::new(); - while let Some(c1) = chars.next() { - let c2 = match chars.next() { - Some(ch) => ch, - None => return Err(MissingSecondChar(c1)), - }; - let flag = try_flag_from_u16(u16::from_ne_bytes([c1 as u8, c2 as u8]))?; - flags.insert(flag); - } - Ok(flags) - } - Self::Numeric => { - let mut flags = FlagSet::new(); - let mut number = String::new(); - let mut separated = false; - for ch in chars.by_ref() { - if ch.is_ascii_digit() { - number.push(ch); - } else { - if ch == ',' && separated { - return Err(DuplicateComma); - } - if ch == ',' { - separated = true; - let n = number.parse::().map_err(ParseIntError)?; - flags.insert(try_flag_from_u16(n)?); - } - } - } - Ok(flags) - } - Self::Utf8 => chars.map(try_flag_from_char).collect(), - } - } -} - #[derive(Debug, PartialEq, Eq, Clone)] pub(crate) struct Condition { /// The input pattern. @@ -375,18 +265,23 @@ impl FromStr for Condition { pub(crate) struct Affix { /// The flag that words may use to reference this affix. flag: Flag, - /// Whether the affix is compatible with the opposite affix. - /// For example a word that has both a prefix and a suffix, both the prefix - /// and suffix should have `crossproduct: true`. + /// Whether the affix is compatible with the opposite affix. For example a word that has both + /// a prefix and a suffix, both the prefix and suffix should have `crossproduct: true`. crossproduct: bool, /// What is stripped from the stem when the affix is applied. strip: Option, /// What should be added when the affix is applied. add: String, - /// Condition that the stem should be checked against to query if the - /// affix is relevant. - condition: Condition, - /// Flags the affix has itself. + /// Condition that the stem should be checked against to query if the affix is relevant. + /// + /// This is optional in Spellbook. Hunspell and Nuspell represent what we say is `None` as + /// `"."`. It's a pattern that always matches the input since the input to `condition_matches` + /// is never empty. + condition: Option, + /// Continuation flags. + /// + /// These are included with the `add` in `.aff` files (separated by `/`). + // TODO: document how they're used. flags: FlagSet, phantom_data: PhantomData, } @@ -397,16 +292,18 @@ impl Affix { crossproduct: bool, strip: Option<&str>, add: &str, - condition: &str, + condition: Option<&str>, flags: FlagSet, ) -> Result { + let condition = condition.map(str::parse).transpose()?; + Ok(Self { flag, crossproduct, strip: strip.map(|str| str.to_string()), add: add.to_string(), flags, - condition: condition.parse()?, + condition, phantom_data: PhantomData, }) } @@ -497,12 +394,17 @@ impl Prefix { } pub fn condition_matches(&self, word: &str) -> bool { + let condition = match self.condition.as_ref() { + Some(condition) => condition, + None => return true, + }; + // Length in bytes is greater than or equal to length in chars. - if word.len() < self.condition.chars { + if word.len() < condition.chars { return false; } - self.condition.matches(word) + condition.matches(word) } } @@ -553,8 +455,13 @@ impl Suffix { } pub fn condition_matches(&self, word: &str) -> bool { + let condition = match self.condition.as_ref() { + Some(condition) => condition, + None => return true, + }; + // Length in bytes is greater than or equal to length in chars. - if word.len() < self.condition.chars { + if word.len() < condition.chars { return false; } @@ -562,17 +469,17 @@ impl Suffix { let (chars, bytes) = word.chars() .rev() - .take(self.condition.chars) + .take(condition.chars) .fold((0, 0), |(chars, bytes), ch| { // TODO: convert to a u32 instead and check with bit math how many bytes // the code point takes. (chars + 1, bytes + ch.encode_utf8(buffer).len()) }); - if chars < self.condition.chars { + if chars < condition.chars { return false; } - self.condition.matches(&word[word.len() - bytes..]) + condition.matches(&word[word.len() - bytes..]) } } @@ -633,7 +540,13 @@ pub(crate) struct AffixIndex { impl FromIterator> for AffixIndex { fn from_iter>>(iter: T) -> Self { - let mut table: Vec<_> = iter.into_iter().collect(); + let table: Vec<_> = iter.into_iter().collect(); + table.into() + } +} + +impl From>> for AffixIndex { + fn from(mut table: Vec>) -> Self { // Sort the table lexiographically by key. We will use this lexiographical ordering to // efficiently search in AffixesIter. table.sort_unstable_by(|a, b| a.appending().cmp(b.appending())); @@ -779,7 +692,7 @@ impl<'a, K: AffixKind> Iterator for AffixesIter<'a, K> { /// // TODO: document how breaks are used and what the patterns mean. // TODO: use the Default implementation to give what Hunspell considers default? -#[derive(Debug, Default)] +#[derive(Debug)] pub(crate) struct BreakTable { table: Vec, start_word_breaks_last_idx: usize, @@ -788,6 +701,14 @@ pub(crate) struct BreakTable { middle_word_breaks_last_idx: usize, } +impl Default for BreakTable { + fn default() -> Self { + use crate::alloc::vec; + + vec!["^-", "-", "-$"].into() + } +} + impl From> for BreakTable { fn from(breaks: Vec<&str>) -> Self { let mut start = Vec::new(); @@ -944,29 +865,29 @@ pub(crate) struct AffData { prefixes: PrefixIndex, suffixes: SuffixIndex, break_table: BreakTable, - ignored_chars: String, - compound_rules: CompoundRuleTable, + // compound_rules: CompoundRuleTable, TODO: parsing compound_syllable_vowels: String, - compound_patterns: Vec, + // compound_patterns: Vec, TODO: parsing // input_substr_replacer: ? TODO // locale TODO // output_substr_replacer: ? TODO // suggestion options - // replacements: ReplacementTable, + // replacements: ReplacementTable, TODO // similarities: Vec, - // keyboard_closeness: String, - // try_chars: String, // phonetic_table: PhoneticTable, + ignore_chars: String, + keyboard_closeness: String, + try_chars: String, options: AffOptions, } -#[derive(Debug, Default)] +#[derive(Debug)] pub(crate) struct AffOptions { complex_prefixes: bool, fullstrip: bool, checksharps: bool, forbid_warn: bool, - compound_only_in_flag: Option, + only_in_compound_flag: Option, circumfix_flag: Option, forbidden_word_flag: Option, keep_case_flag: Option, @@ -981,24 +902,67 @@ pub(crate) struct AffOptions { compound_max_word_count: u16, compound_permit_flag: Option, compound_forbid_flag: Option, - compound_force_uppercase: Option, + compound_force_uppercase_flag: Option, compound_more_suffixes: bool, compound_check_duplicate: bool, compound_check_rep: bool, compound_check_case: bool, compound_check_triple: bool, + compound_simplified_triple: bool, compound_syllable_num: bool, compound_syllable_max: u16, max_compound_suggestions: u16, no_suggest_flag: Option, substandard_flag: Option, max_ngram_suggestions: u16, - max_diff_factor: Option, + max_diff_factor: u16, only_max_diff: bool, no_split_suggestions: bool, suggest_with_dots: bool, } +impl Default for AffOptions { + fn default() -> Self { + Self { + complex_prefixes: Default::default(), + fullstrip: Default::default(), + checksharps: Default::default(), + forbid_warn: Default::default(), + only_in_compound_flag: Default::default(), + circumfix_flag: Default::default(), + forbidden_word_flag: Default::default(), + keep_case_flag: Default::default(), + need_affix_flag: Default::default(), + warn_flag: Default::default(), + compound_flag: Default::default(), + compound_begin_flag: Default::default(), + compound_middle_flag: Default::default(), + compound_last_flag: Default::default(), + compound_min_length: Default::default(), + compound_max_word_count: Default::default(), + compound_permit_flag: Default::default(), + compound_forbid_flag: Default::default(), + compound_force_uppercase_flag: Default::default(), + compound_more_suffixes: Default::default(), + compound_check_duplicate: Default::default(), + compound_check_rep: Default::default(), + compound_check_case: Default::default(), + compound_check_triple: Default::default(), + compound_simplified_triple: Default::default(), + compound_syllable_num: Default::default(), + compound_syllable_max: Default::default(), + max_compound_suggestions: 3, + no_suggest_flag: Default::default(), + substandard_flag: Default::default(), + max_ngram_suggestions: 5, + max_diff_factor: 5, + only_max_diff: Default::default(), + no_split_suggestions: Default::default(), + suggest_with_dots: Default::default(), + } + } +} + #[cfg(test)] mod test { use super::*; @@ -1097,7 +1061,7 @@ mod test { fn affix_index_prefix_multiset_nuspell_unit_test() { // Upstream: fn prefix(add: &str) -> Prefix { - Prefix::new(Flag::new(1).unwrap(), true, None, add, ".", flagset![]).unwrap() + Prefix::new(Flag::new(1).unwrap(), true, None, add, None, flagset![]).unwrap() } let index: PrefixIndex = [ @@ -1119,7 +1083,7 @@ mod test { fn affix_index_suffix_multiset_nuspell_unit_test() { // Upstream: fn suffix(add: &str) -> Suffix { - Suffix::new(Flag::new(1).unwrap(), true, None, add, ".", flagset![]).unwrap() + Suffix::new(Flag::new(1).unwrap(), true, None, add, None, flagset![]).unwrap() } let index: SuffixIndex = [ @@ -1150,10 +1114,11 @@ mod test { // SFX D 0 ed [^ey] // SFX D 0 ed [aeiou]y let flag = Flag::new('D' as u16).unwrap(); - let suffix1 = Suffix::new(flag, true, None, "d", "e", flagset![]).unwrap(); - let suffix2 = Suffix::new(flag, true, Some("y"), "ied", "[^aeiou]y", flagset![]).unwrap(); - let suffix3 = Suffix::new(flag, true, None, "ed", "[^ey]", flagset![]).unwrap(); - let suffix4 = Suffix::new(flag, true, None, "ed", "[aeiou]y", flagset![]).unwrap(); + let suffix1 = Suffix::new(flag, true, None, "d", Some("e"), flagset![]).unwrap(); + let suffix2 = + Suffix::new(flag, true, Some("y"), "ied", Some("[^aeiou]y"), flagset![]).unwrap(); + let suffix3 = Suffix::new(flag, true, None, "ed", Some("[^ey]"), flagset![]).unwrap(); + let suffix4 = Suffix::new(flag, true, None, "ed", Some("[aeiou]y"), flagset![]).unwrap(); let index: SuffixIndex = [&suffix1, &suffix2, &suffix3, &suffix4] .into_iter() diff --git a/src/aff/parser.rs b/src/aff/parser.rs new file mode 100644 index 0000000..ed613f5 --- /dev/null +++ b/src/aff/parser.rs @@ -0,0 +1,1074 @@ +//! The parser for `.aff` and `.dic` files. +//! +//! This parser takes inspiration from Zspell: +//! and Nuspell. The Rust bits are mostly Zspell: folding over a const slice of parser functions. +//! Rather than trying to parse directly into the `Aff_Data` type, Nuspell has a sort of +//! scratch-pad data structure for the initial parsing. This works well for types like +//! `AffixIndex` which are most efficient to build all-at-once from all elements rather than +//! constructively by inserting each element. + +use core::{ + hash::BuildHasher, + iter::{Enumerate, Peekable, TakeWhile}, + str::{Chars, SplitWhitespace}, +}; + +use hashbrown::HashMap; + +use crate::{ + alloc::{ + string::{String, ToString}, + vec::Vec, + }, + ParseDictionaryErrorSource, WordList, +}; + +use crate::{Flag, FlagSet, ParseDictionaryError, ParseDictionaryErrorKind}; + +use super::{AffData, AffOptions, FlagType, ParseFlagError, Prefix, Suffix}; + +type Result = core::result::Result; +type ParseResult = Result<()>; + +#[derive(Debug, Default)] +struct AffLineParser<'aff> { + options: AffOptions, + // options only used for parsing: + flag_type: FlagType, + // encoding: Encoding, + flag_aliases: Vec, + // wordchars: String, deprecated? + replacements: Vec<(&'aff str, &'aff str)>, + break_patterns: Vec<&'aff str>, + compound_syllable_vowels: &'aff str, + ignore_chars: &'aff str, + try_chars: &'aff str, + keyboard_closeness: &'aff str, + prefixes: Vec, + suffixes: Vec, +} + +type Parser = for<'aff> fn(&mut AffLineParser<'aff>, &mut Lines<'aff>) -> ParseResult; + +// These parsers are only used for the `.aff` file's contents. The `.dic` file is handled ad-hoc. +const AFF_PARSERS: [(&str, Parser); 44] = [ + ("FLAG", parse_flag_type), + // Flags + ("FORBIDDENWORD", parse_forbidden_word_flag), + ("CIRCUMFIX", parse_circumfix_flag), + ("KEEPCASE", parse_keep_case_flag), + ("NEEDAFFIX", parse_need_affix_flag), + ("NOSUGGEST", parse_no_suggest_flag), + ("SUBSTANDARD", parse_substandard_flag), + ("WARN", parse_warn_flag), + ("COMPOUNDFLAG", parse_compound_flag), + ("COMPOUNDBEGIN", parse_compound_begin_flag), + ("COMPOUNDMIDDLE", parse_compound_middle_flag), + ("COMPOUNDLAST", parse_compound_last_flag), + ("ONLYINCOMPOUND", parse_only_in_compound_flag), + ("COMPOUNDPERMITFLAG", parse_compound_permit_flag), + ("COMPOUNDFORBIDFLAG", parse_compound_forbid_flag), + ("FORCEUCASE", parse_compound_force_uppercase_flag), + // Bools + ("COMPLEXPREFIXES", parse_complex_prefixes), + ("FULLSTRIP", parse_fullstrip), + ("CHECKSHARPS", parse_checksharps), + ("FORBIDWARN", parse_forbid_warn), + ("COMPOUNDMORESUFFIXES", parse_compound_more_suffixes), + ("CHECKCOMPOUNDDUP", parse_compound_check_duplicate), + ("CHECKCOMPOUNDREP", parse_compound_check_rep), + ("CHECKCOMPOUNDCASE", parse_compound_check_case), + ("CHECKCOMPOUNDTRIPLE", parse_compound_check_triple), + ("SIMPLIFIEDTRIPLE", parse_compound_simplified_triple), + ("SYLLABLENUM", parse_compound_syllable_num), + ("ONLYMAXDIFF", parse_only_max_diff), + ("NOSPLITSUGS", parse_no_split_suggestions), + ("SUGSWITHDOTS", parse_suggest_with_dots), + // "Shorts" as Nuspell calls them (u16s in Spellbook) + ("COMPOUNDMIN", parse_compound_min_length), + ("COMPOUNDWORDMAX", parse_compound_max_word_count), + ("MAXCPDSUGS", parse_max_compound_suggestions), + ("MAXNGRAMSUGS", parse_max_ngram_suggestions), + ("MAXDIFF", parse_max_diff_factor), + // Strings + ("IGNORE", parse_ignore_chars), + ("KEY", parse_keyboard_closeness), + ("TRY", parse_try_chars), + // String pairs + // TODO: phonetic replacements, input & output conversion + ("REP", parse_replacements), + // Remaining complicated structures + ("BREAK", parse_break_patterns), + ("COMPOUNDSYLLABLE", parse_compound_syllable), + ("AF", parse_flag_aliases), + ("PFX", parse_prefix_table), + ("SFX", parse_suffix_table), + // TODO: + // ("CHECKCOMPOUNDPATTERN", parse_compound_pattern), + // ("COMPOUNDRULE", parse_compound_rule), +]; + +// TODO: encoding? Or just require all dictionaries to be UTF-8? +// TODO: drop dependency on Default for S and allow passing it in. +pub(crate) fn parse<'dic, 'aff, S: BuildHasher + Default>( + dic_text: &'dic str, + aff_text: &'aff str, +) -> Result> { + // First parse the aff file. + let mut lines = Lines::<'aff>::new(aff_text, ParseDictionaryErrorSource::Aff); + let aff_parsers = HashMap::<&str, Parser, S>::from_iter(AFF_PARSERS.into_iter()); + let mut cx = AffLineParser::<'aff>::default(); + + while !lines.is_finished() { + if let Some(parser) = lines.next_word().and_then(|key| aff_parsers.get(key)) { + (parser)(&mut cx, &mut lines)?; + } + + lines.advance_line() + } + + // Then parse the dic file into a WordList. + let mut lines = Lines::<'dic>::new(dic_text, ParseDictionaryErrorSource::Dic); + let row_count = lines + .take_exactly_one_word()? + .parse::() + .map_err(|err| lines.error(ParseDictionaryErrorKind::MalformedNumber(err)))?; + let mut words = WordList::::with_capacity(row_count); + + for row in 1..=row_count { + lines.advance_line(); + if lines.is_finished() { + return Err(lines.error(ParseDictionaryErrorKind::MismatchedRowCount { + expected: row_count, + actual: row, + })); + } + + // NOTE: currently we ignore morphological fields. + let word = match lines.next_word() { + Some(word) => word, + // Empty lines are skipped. + None => continue, + }; + let (word, flagset) = parse_dic_line(word, cx.flag_type, &cx.flag_aliases, cx.ignore_chars) + .map_err(|err| lines.error(ParseDictionaryErrorKind::MalformedFlag(err)))?; + words.insert(word, flagset); + } + + // Collect everything into AffData. + Ok(AffData { + words, + prefixes: cx.prefixes.into(), + suffixes: cx.suffixes.into(), + break_table: cx.break_patterns.into(), + // compound_rules: todo!(), + compound_syllable_vowels: cx.compound_syllable_vowels.to_string(), + // compound_patterns: todo!(), + ignore_chars: cx.ignore_chars.to_string(), + keyboard_closeness: cx.keyboard_closeness.to_string(), + try_chars: cx.try_chars.to_string(), + options: cx.options, + }) +} + +fn parse_flag_type(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + let word = lines.take_exactly_one_word()?; + + word.parse::() + .map(|flag_type| cx.flag_type = flag_type) + .map_err(|err| lines.error(ParseDictionaryErrorKind::UnknownFlagType(err))) +} + +fn parse_forbidden_word_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.forbidden_word_flag = Some(flag)) +} + +fn parse_circumfix_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.circumfix_flag = Some(flag)) +} + +fn parse_keep_case_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.keep_case_flag = Some(flag)) +} + +fn parse_need_affix_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.need_affix_flag = Some(flag)) +} + +fn parse_no_suggest_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.no_suggest_flag = Some(flag)) +} + +fn parse_substandard_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.substandard_flag = Some(flag)) +} + +fn parse_warn_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.warn_flag = Some(flag)) +} + +fn parse_compound_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_flag = Some(flag)) +} + +fn parse_compound_begin_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_begin_flag = Some(flag)) +} + +fn parse_compound_middle_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_middle_flag = Some(flag)) +} + +fn parse_compound_last_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_last_flag = Some(flag)) +} + +fn parse_only_in_compound_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.only_in_compound_flag = Some(flag)) +} + +fn parse_compound_permit_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_permit_flag = Some(flag)) +} + +fn parse_compound_forbid_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_forbid_flag = Some(flag)) +} + +fn parse_compound_force_uppercase_flag(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_flag(cx) + .map(|flag| cx.options.compound_force_uppercase_flag = Some(flag)) +} + +fn parse_complex_prefixes(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.complex_prefixes = b) +} + +fn parse_fullstrip(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.fullstrip = b) +} + +fn parse_checksharps(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.checksharps = b) +} + +fn parse_forbid_warn(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.forbid_warn = b) +} + +fn parse_compound_more_suffixes(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_more_suffixes = b) +} + +fn parse_compound_check_duplicate(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_check_duplicate = b) +} + +fn parse_compound_check_rep(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_check_rep = b) +} + +fn parse_compound_check_case(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_check_case = b) +} + +fn parse_compound_check_triple(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_check_triple = b) +} + +fn parse_compound_simplified_triple(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_simplified_triple = b) +} + +fn parse_compound_syllable_num(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.compound_syllable_num = b) +} + +fn parse_only_max_diff(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.only_max_diff = b) +} + +fn parse_no_split_suggestions(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_bool() + .map(|b| cx.options.no_split_suggestions = b) +} + +fn parse_suggest_with_dots(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_bool().map(|b| cx.options.suggest_with_dots = b) +} + +fn parse_compound_min_length(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_short() + .map(|s| cx.options.compound_min_length = s.min(1)) +} + +fn parse_compound_max_word_count(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_short() + .map(|s| cx.options.compound_max_word_count = s) +} + +fn parse_max_compound_suggestions(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_short() + .map(|s| cx.options.max_compound_suggestions = s) +} + +fn parse_max_ngram_suggestions(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines + .parse_short() + .map(|s| cx.options.max_ngram_suggestions = s) +} + +fn parse_max_diff_factor(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + let s = lines.parse_short()?; + cx.options.max_diff_factor = if s > 10 { 5 } else { s }; + Ok(()) +} + +fn parse_ignore_chars<'a>(cx: &mut AffLineParser<'a>, lines: &mut Lines<'a>) -> ParseResult { + lines + .take_exactly_one_word() + .map(|word| cx.ignore_chars = word) +} + +fn parse_keyboard_closeness<'a>(cx: &mut AffLineParser<'a>, lines: &mut Lines<'a>) -> ParseResult { + lines + .take_exactly_one_word() + .map(|word| cx.keyboard_closeness = word) +} + +fn parse_try_chars<'a>(cx: &mut AffLineParser<'a>, lines: &mut Lines<'a>) -> ParseResult { + lines + .take_exactly_one_word() + .map(|word| cx.try_chars = word) +} + +fn parse_replacements<'aff>(cx: &mut AffLineParser<'aff>, lines: &mut Lines<'aff>) -> ParseResult { + lines.parse_table2("REP", |str1, str2| { + cx.replacements.push((str1, str2)); + Ok(()) + }) +} + +fn parse_break_patterns<'aff>( + cx: &mut AffLineParser<'aff>, + lines: &mut Lines<'aff>, +) -> ParseResult { + lines.parse_table1("BREAK", |str| { + cx.break_patterns.push(str); + Ok(()) + }) +} + +fn parse_compound_syllable<'aff>( + cx: &mut AffLineParser<'aff>, + lines: &mut Lines<'aff>, +) -> ParseResult { + // Takes the shape COMPOUNDSYLLABLE + let mut words = match lines.words.take() { + Some(words) => words, + None => { + return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 0, + })) + } + }; + + let max = match words.next() { + Some(word) => word, + None => { + return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 0, + })) + } + }; + let vowels = match words.next() { + Some(word) => word, + None => { + return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 1, + })) + } + }; + let remaining_words = words.count(); + if remaining_words > 0 { + return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 2 + remaining_words, + })); + } + + cx.options.compound_syllable_max = max + .parse::() + .map_err(|err| lines.error(ParseDictionaryErrorKind::MalformedNumber(err)))?; + cx.compound_syllable_vowels = vowels; + + Ok(()) +} + +fn parse_flag_aliases(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_table1("AF", |alias| { + let flagset = parse_flags_from_chars(cx.flag_type, alias.chars())?; + cx.flag_aliases.push(flagset); + Ok(()) + }) +} + +fn parse_prefix_table(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_affix_table( + "PFX", + cx.flag_type, + |flag, crossproduct, strip, add, condition, flagset_str| { + let flagset = decode_flagset(flagset_str, cx.flag_type, &cx.flag_aliases)?; + let prefix = Prefix::new(flag, crossproduct, strip, add, condition, flagset)?; + cx.prefixes.push(prefix); + Ok(()) + }, + ) +} + +fn parse_suffix_table(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult { + lines.parse_affix_table( + "SFX", + cx.flag_type, + |flag, crossproduct, strip, add, condition, flagset_str| { + let flagset = decode_flagset(flagset_str, cx.flag_type, &cx.flag_aliases)?; + let suffix = Suffix::new(flag, crossproduct, strip, add, condition, flagset)?; + cx.suffixes.push(suffix); + Ok(()) + }, + ) +} + +/// A helper type that means "words on a line split by whitespace with comments +/// dropped." This is a concretion of `impl Iterator`. +type Words<'text> = TakeWhile, for<'b, 'c> fn(&'b &'c str) -> bool>; + +struct Lines<'text> { + lines: Peekable>>, + words: Option>, + source: ParseDictionaryErrorSource, +} + +impl<'text> Lines<'text> { + fn new(text: &'text str, source: ParseDictionaryErrorSource) -> Self { + let mut lines = text.lines().enumerate().peekable(); + let words = lines.peek().map(|(_line_no, line)| { + line.split_whitespace() + .take_while((|word| !word.starts_with('#')) as for<'b, 'c> fn(&'b &'c str) -> bool) + }); + + Self { + lines, + words, + source, + } + } + + fn is_finished(&mut self) -> bool { + self.lines.peek().is_none() + } + + fn advance_line(&mut self) { + self.lines.next(); + self.words = self.lines.peek().map(|(_line_no, line)| { + line.split_whitespace() + .take_while((|word| !word.starts_with('#')) as for<'b, 'c> fn(&'b &'c str) -> bool) + }); + } + + fn next_word(&mut self) -> Option<&str> { + let mut words = self.words.take()?; + let word = words.next()?; + self.words = Some(words); + Some(word) + } + + fn take_exactly_one_word(&mut self) -> Result<&'text str> { + let mut words = self.words.take().ok_or_else(|| { + self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 1, + actual: 0, + }) + })?; + let word = words.next().ok_or_else(|| { + self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 1, + actual: 0, + }) + })?; + self.words = Some(words); + Ok(word) + } + + fn parse_flag(&mut self, cx: &AffLineParser) -> Result { + let word = self.take_exactly_one_word()?; + parse_flag_from_str(cx.flag_type, word) + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedFlag(err))) + } + + fn parse_bool(&mut self) -> Result { + // Boolean flags are specified by just the key. For example if you see `COMPLEXPREFIXES` + // as a line, `complex_prefixes` is true. Otherwise it's false. + let count = self + .words + .take() + .map(|words| words.count()) + .unwrap_or_default(); + if count > 0 { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 0, + actual: count, + })); + } + Ok(true) + } + + fn parse_short(&mut self) -> Result { + let word = self.take_exactly_one_word()?; + word.parse::() + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedNumber(err))) + } + + fn parse_table1(&mut self, key: &str, mut f: F) -> ParseResult + where + F: FnMut(&'text str) -> core::result::Result<(), ParseDictionaryErrorKind>, + { + let row_count = self + .take_exactly_one_word()? + .parse::() + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedNumber(err)))?; + + for row in 1..=row_count { + self.advance_line(); + if self.is_finished() || self.next_word() != Some(key) { + return Err(self.error(ParseDictionaryErrorKind::MismatchedRowCount { + expected: row_count, + actual: row, + })); + } + + let mut words = match self.words.take() { + Some(words) => words, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 1, + actual: 0, + })) + } + }; + + let word = match words.next() { + Some(word) => word, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 1, + actual: 0, + })) + } + }; + let remaining_words = words.count(); + if remaining_words > 0 { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 1, + actual: 1 + remaining_words, + })); + } + + f(word).map_err(|kind| self.error(kind))?; + } + + Ok(()) + } + + fn parse_table2(&mut self, key: &str, mut f: F) -> ParseResult + where + F: FnMut(&'text str, &'text str) -> core::result::Result<(), ParseDictionaryErrorKind>, + { + let row_count = self + .take_exactly_one_word()? + .parse::() + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedNumber(err)))?; + + for row in 1..=row_count { + self.advance_line(); + if self.is_finished() || self.next_word() != Some(key) { + return Err(self.error(ParseDictionaryErrorKind::MismatchedRowCount { + expected: row_count, + actual: row, + })); + } + + let mut words = match self.words.take() { + Some(words) => words, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 0, + })) + } + }; + + let word1 = match words.next() { + Some(word) => word, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 0, + })) + } + }; + let word2 = match words.next() { + Some(word) => word, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 1, + })) + } + }; + let remaining_words = words.count(); + if remaining_words > 0 { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 2, + actual: 2 + remaining_words, + })); + } + + f(word1, word2).map_err(|kind| self.error(kind))?; + } + + Ok(()) + } + + fn parse_affix_table(&mut self, key: &str, flag_type: FlagType, mut f: F) -> ParseResult + where + F: FnMut( + Flag, // flag + bool, // crossproduct + Option<&str>, // strip + &str, // add + Option<&str>, // condition + &str, // flagset + ) -> core::result::Result<(), ParseDictionaryErrorKind>, + { + // Header takes the shapes: + // PFX flag cross_product row_count + // SFX flag cross_product row_count + let mut words = match self.words.take() { + Some(words) => words, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 0, + })) + } + }; + + let flag_str = match words.next() { + Some(word) => word, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 0, + })) + } + }; + let flag = parse_flag_from_str(flag_type, flag_str) + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedFlag(err)))?; + + let crossproduct = match words.next() { + Some("Y") => true, + Some("N") => true, + Some(_) => return Err(self.error(ParseDictionaryErrorKind::MalformedAffix)), + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 1, + })) + } + }; + + let row_count = match words.next() { + Some(word) => word + .parse::() + .map_err(|err| self.error(ParseDictionaryErrorKind::MalformedNumber(err)))?, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 2, + })) + } + }; + + let remaining_words = words.count(); + if remaining_words > 0 { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 3 + remaining_words, + })); + } + + for row in 1..=row_count { + // Each row takes the shape: + // PFX flag stripping prefix [condition [morphological_fields...]] + // SFX flag stripping suffix [condition [morphological_fields...]] + self.advance_line(); + if self.is_finished() || self.next_word() != Some(key) { + return Err(self.error(ParseDictionaryErrorKind::MismatchedRowCount { + expected: row_count, + actual: row, + })); + } + + let mut words = match self.words.take() { + Some(words) => words, + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 0, + })) + } + }; + + if words.next() != Some(flag_str) { + return Err(self.error(ParseDictionaryErrorKind::MalformedAffix)); + } + + let strip = match words.next() { + Some("0") => None, + Some(word) => Some(word), + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 1, + })) + } + }; + + // the add needs to be split into flagset. + let (add, flagset) = match words.next() { + Some(word) => split_word_and_flagset_naive(word), + None => { + return Err(self.error(ParseDictionaryErrorKind::MismatchedArity { + expected: 3, + actual: 2, + })) + } + }; + + // "." is the empty condition - it always matches. We'll use an Option for this + // fast lane instead. + let condition = words.next().filter(|&cond| cond != "."); + + // NOTE: we don't check the remaining words on the line because morphological fields + // are allowed after the condition and we don't currently parse those. + + f(flag, crossproduct, strip, add, condition, flagset) + .map_err(|kind| self.error(kind))? + } + + Ok(()) + } + + fn error(&mut self, kind: ParseDictionaryErrorKind) -> ParseDictionaryError { + ParseDictionaryError { + kind, + source: self.source, + line_number: self + .lines + .peek() + .map(|(line_number, _line)| line_number) + .copied(), + } + } +} + +fn try_flag_from_u16(val: u16) -> core::result::Result { + Flag::new(val).ok_or(ParseFlagError::ZeroFlag) +} + +fn try_flag_from_u32(val: u32) -> core::result::Result { + if val > u16::MAX as u32 { + return Err(ParseFlagError::FlagAbove65535); + } + try_flag_from_u16(val as u16) +} + +fn try_flag_from_char(ch: char) -> core::result::Result { + try_flag_from_u32(ch as u32) +} + +fn parse_flag_from_str( + flag_type: FlagType, + input: &str, +) -> core::result::Result { + use ParseFlagError::*; + assert!(!input.is_empty()); + + match flag_type { + FlagType::Short => { + let mut chars = input.chars(); + let ch = chars.next().expect("asserted to be non-empty above"); + if ch.is_ascii() { + // The flag is ASCII: it's a valid `u8` so it can fit into a `u16`. + try_flag_from_u16(ch as u16) + } else { + Err(NonAscii(ch)) + } + } + FlagType::Long => { + let mut chars = input.chars(); + let c1 = chars.next().expect("asserted to be non-empty above"); + if !c1.is_ascii() { + return Err(NonAscii(c1)); + } + let c2 = match chars.next() { + Some(ch) => ch, + None => return Err(MissingSecondChar(c1)), + }; + if !c2.is_ascii() { + return Err(NonAscii(c2)); + } + + try_flag_from_u16(u16::from_ne_bytes([c1 as u8, c2 as u8])) + } + FlagType::Numeric => { + let number = input.parse::().map_err(ParseIntError)?; + try_flag_from_u16(number) + } + FlagType::Utf8 => { + let mut chars = input.chars(); + let ch = chars.next().expect("asserted to be non-empty above"); + try_flag_from_char(ch) + } + } +} + +fn parse_flags_from_chars( + flag_type: FlagType, + mut chars: Chars, +) -> core::result::Result { + use ParseFlagError::*; + + match flag_type { + FlagType::Short => { + chars + .map(|ch| { + if ch.is_ascii() { + // The flag is ASCII: it's a valid `u8` so it can fit into a `u16`. + try_flag_from_u16(ch as u16) + } else { + Err(ParseFlagError::NonAscii(ch)) + } + }) + .collect() + } + FlagType::Long => { + let mut flags = FlagSet::new(); + while let Some(c1) = chars.next() { + let c2 = match chars.next() { + Some(ch) => ch, + None => return Err(MissingSecondChar(c1)), + }; + let flag = try_flag_from_u16(u16::from_ne_bytes([c1 as u8, c2 as u8]))?; + flags.insert(flag); + } + Ok(flags) + } + FlagType::Numeric => { + let mut flags = FlagSet::new(); + let mut number = String::new(); + let mut separated = false; + for ch in chars.by_ref() { + if ch.is_ascii_digit() { + number.push(ch); + } else { + if ch == ',' && separated { + return Err(DuplicateComma); + } + if ch == ',' { + separated = true; + let n = number.parse::().map_err(ParseIntError)?; + flags.insert(try_flag_from_u16(n)?); + } + } + } + if !number.is_empty() { + let n = number.parse::().map_err(ParseIntError)?; + flags.insert(try_flag_from_u16(n)?); + } + Ok(flags) + } + FlagType::Utf8 => chars.map(try_flag_from_char).collect(), + } +} + +/// Input is assumed to be a single word, i.e. not containing whitespace. +/// This only splits on the slash, it doesn't handle escaping. +// NOTE: in pratice no dictionary uses escaping for affix continuation flags. +fn split_word_and_flagset_naive(input: &str) -> (&str, &str) { + input.split_once('/').unwrap_or((input, "")) +} + +/// Attempt to look up the flagset as an alias. +fn decode_flagset( + input: &str, + flag_type: FlagType, + aliases: &[FlagSet], +) -> core::result::Result { + // Fast lane for numeric flag-types and empty aliases. + if matches!(flag_type, FlagType::Numeric) || aliases.is_empty() { + // TODO: refactor this function to take a str + return parse_flags_from_chars(flag_type, input.chars()); + } + + if let Some(index) = input + .parse::() + .ok() + .map(|i| i as usize) + .filter(|&i| i > 0 && i <= aliases.len()) + { + // NOTE: the aliases are 1-indexed. + Ok(aliases[index - 1].clone()) + } else { + parse_flags_from_chars(flag_type, input.chars()) + } +} + +fn parse_dic_line( + input: &str, + flag_type: FlagType, + aliases: &[FlagSet], + ignore_chars: &str, +) -> core::result::Result<(String, FlagSet), ParseFlagError> { + let mut chars = input.chars(); + let mut word = String::new(); + let mut escape = false; + for ch in chars.by_ref() { + match ch { + '\\' => escape = !escape, + '/' if !escape => break, + _ => word.push(ch), + } + } + if !ignore_chars.is_empty() { + todo!("erase ignored characters from the word") + } + let flags_str: String = chars.collect(); + let flag_set = decode_flagset(&flags_str, flag_type, aliases)?; + + Ok((word, flag_set)) +} + +#[cfg(test)] +mod test { + use crate::{flag, flagset}; + + use super::*; + + #[test] + fn naive_word_flagset_split_test() { + assert_eq!( + ("word", "flags"), + split_word_and_flagset_naive("word/flags") + ); + assert_eq!(("word", ""), split_word_and_flagset_naive("word")); + assert_eq!(("", ""), split_word_and_flagset_naive("")); + } + + #[test] + fn parse_flagset_test() { + assert_eq!( + Ok(flag!('a' as u16)), + parse_flag_from_str(FlagType::Short, "a") + ); + assert_eq!(Ok(flag!(1)), parse_flag_from_str(FlagType::Numeric, "1")); + + assert_eq!( + Ok(flagset![1]), + parse_flags_from_chars(FlagType::Numeric, "1".chars()) + ); + } + + #[test] + fn decode_flagset_test() { + let aliases = &[flagset![1], flagset![2], flagset![3, 4]]; + + // NOTE: 1-indexing. + assert_eq!( + flagset![1], + decode_flagset("1", FlagType::default(), aliases).unwrap() + ); + assert_eq!( + flagset![2], + decode_flagset("2", FlagType::default(), aliases).unwrap() + ); + assert_eq!( + flagset![3, 4], + decode_flagset("3", FlagType::default(), aliases).unwrap() + ); + assert_eq!( + flagset!['a' as u16], + decode_flagset("a", FlagType::default(), aliases).unwrap() + ); + + assert_eq!( + flagset![1], + decode_flagset("1", FlagType::Numeric, aliases).unwrap() + ); + assert_eq!( + flagset!['1' as u16], + decode_flagset("1", FlagType::default(), &[]).unwrap() + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index bdc40fb..d642b5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -212,6 +212,105 @@ impl fmt::Debug for FlagSet { pub(crate) type WordList = hash_multi_map::HashMultiMap; +#[derive(Debug)] +pub struct ParseDictionaryError { + pub kind: ParseDictionaryErrorKind, + pub source: ParseDictionaryErrorSource, + pub line_number: Option, +} + +impl fmt::Display for ParseDictionaryError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.line_number { + Some(line) => write!( + f, + "failed to parse {} file on line {}: {}", + self.source, line, self.kind + ), + None => write!(f, "failed to parse {} file: {}", self.source, self.kind), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum ParseDictionaryErrorSource { + Dic, + Aff, + // Personal, ? +} + +impl fmt::Display for ParseDictionaryErrorSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Dic => write!(f, ".dic"), + Self::Aff => write!(f, ".aff"), + } + } +} + +#[derive(Debug)] +pub enum ParseDictionaryErrorKind { + UnknownFlagType(aff::UnknownFlagTypeError), + MalformedFlag(aff::ParseFlagError), + MalformedNumber(core::num::ParseIntError), + UnexpectedNonWhitespace(char), + MismatchedArity { expected: usize, actual: usize }, + MismatchedRowCount { expected: usize, actual: usize }, + // MalformedCompoundRule(ParseCompoundRuleError), + // MalformedMorphologicalField(String), + MalformedAffix, + MalformedCondition(aff::ConditionError), + Empty, +} + +impl From for ParseDictionaryErrorKind { + fn from(err: aff::UnknownFlagTypeError) -> Self { + Self::UnknownFlagType(err) + } +} + +impl From for ParseDictionaryErrorKind { + fn from(err: aff::ParseFlagError) -> Self { + Self::MalformedFlag(err) + } +} + +impl From for ParseDictionaryErrorKind { + fn from(err: aff::ConditionError) -> Self { + Self::MalformedCondition(err) + } +} + +impl fmt::Display for ParseDictionaryErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::UnknownFlagType(err) => err.fmt(f), + Self::MalformedFlag(err) => { + write!(f, "flag is malformed: {}", err) + } + Self::MalformedNumber(err) => err.fmt(f), + Self::UnexpectedNonWhitespace(ch) => { + write!(f, "unexpected non-whitespace character '{}'", ch) + } + Self::MismatchedArity { expected, actual } => { + write!(f, "expected {} arguments but found {}", expected, actual) + } + Self::MismatchedRowCount { expected, actual } => { + write!(f, "expected {} rows but found {}", expected, actual) + } + // Self::MalformedCompoundRule(err) => { + // write!(f, "compound rule is malformed: {}", err) + // } + // Self::MalformedMorphologicalField(s) => { + // write!(f, "morphological field '{}' is malformed", s) + // } + Self::MalformedAffix => write!(f, "failed to parse affix"), + Self::MalformedCondition(err) => write!(f, "condition is malformed: {}", err), + Self::Empty => write!(f, "the file is empty"), + } + } +} + #[cfg(test)] mod test { use super::*;