diff --git a/Cargo.lock b/Cargo.lock index 14f2413..a5acae1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -42,7 +53,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cskk" -version = "0.10.3" +version = "0.11.0" dependencies = [ "anyhow", "bitflags", @@ -52,6 +63,8 @@ dependencies = [ "env_logger", "lazy_static", "log", + "lru", + "nom", "regex", "sequence_trie", "serde", @@ -135,6 +148,15 @@ dependencies = [ "wasi", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -171,6 +193,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "lru" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" +dependencies = [ + "hashbrown", +] + [[package]] name = "memchr" version = "2.5.0" @@ -186,6 +217,28 @@ dependencies = [ "libc", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + [[package]] name = "proc-macro2" version = "1.0.40" @@ -325,6 +378,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 0f6d516..bfc5151 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cskk" # Have to also change the deb package destination filename below -version = "0.10.3" +version = "0.11.0" authors = ["Naoaki Iwakiri "] license = "GPL-3.0-or-later" edition = "2018" @@ -33,6 +33,8 @@ regex = "^1.5" lazy_static = "1.4" xdg = { version = "^2.4" } anyhow = { version = "^1.0.65" } +nom = "^7.1" +lru = "^0.8.1" [dev-dependencies] env_logger = "^0.9.0" diff --git a/README.en.md b/README.en.md index 7f10a7c..8ec4d0f 100644 --- a/README.en.md +++ b/README.en.md @@ -72,7 +72,7 @@ To install to non-standard directories, append following options like this. See - [x] Basic 漢字変換 - [x] static dictionary - [x] user dictionary - - not ddskk compatible + - ddskk compatible since v0.11.0 - [ ] 接頭辞・接尾辞変換 - [x] 数値変換 - [x] auto-start-henkan diff --git a/README.md b/README.md index bcbb6b6..f979c49 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ root権限がある場合、以下を実行する。 - [x] Basic 漢字変換 - [x] static dictionary - [x] user dictionary - - not ddskk compatible + - ddskk compatible since v0.11.0 - [ ] 接頭辞・接尾辞変換 - [x] 数値変換 - [x] auto-start-henkan diff --git a/src/candidate_list.rs b/src/candidate_list.rs index 0609c2d..65cbdbc 100644 --- a/src/candidate_list.rs +++ b/src/candidate_list.rs @@ -1,10 +1,11 @@ use crate::dictionary::candidate::Candidate; +use crate::dictionary::CompositeKey; use crate::error::CskkError; #[derive(Debug)] pub(crate) struct CandidateList { // 現在保持している選択肢の元 - to_composite: String, + to_composite: CompositeKey, // 現在のカーソル位置 selection_cursor_position: usize, // 変換中の選択肢 @@ -14,13 +15,13 @@ pub(crate) struct CandidateList { impl CandidateList { pub(crate) fn new() -> Self { CandidateList { - to_composite: "".to_string(), + to_composite: CompositeKey::new("", None), selection_cursor_position: 0, composition_candidates: vec![], } } - pub(crate) fn set(&mut self, raw_to_composite: String, candidates: Vec) { + pub(crate) fn set(&mut self, raw_to_composite: CompositeKey, candidates: Vec) { self.to_composite = raw_to_composite; self.composition_candidates = candidates; self.selection_cursor_position = 0; @@ -41,7 +42,7 @@ impl CandidateList { } } - pub(crate) fn get_current_to_composite(&self) -> &str { + pub(crate) fn get_current_to_composite(&self) -> &CompositeKey { &self.to_composite } diff --git a/src/cskkstate.rs b/src/cskkstate.rs index 04c0950..932a40a 100644 --- a/src/cskkstate.rs +++ b/src/cskkstate.rs @@ -1,5 +1,6 @@ use crate::candidate_list::CandidateList; use crate::dictionary::candidate::Candidate; +use crate::dictionary::CompositeKey; use crate::form_changer::KanaFormChanger; use crate::skk_modes::{CompositionMode, InputMode}; use std::fmt::{Debug, Formatter}; @@ -23,7 +24,7 @@ pub(crate) struct CskkState { raw_to_composite: String, // 未確定入力の漢字の読み部分。主にひらがな、Abbrev等の時は英字もありうる。出力時にInputModeにあわせて変換される。 converted_kana_to_composite: String, - // 未確定入力の漢字の読み以外の部分。多くの場合送り仮名であり、その想定のもとに変数名を付けてしまったが、auto_start_henkan等の強制的に変換を開始する場合にはおくりがな以外のpostfixが入ることもある。convertがあるInputMode時のみ使用 + // 未確定入力の漢字の読み以外の部分。多くの場合送り仮名であり、その想定のもとに変数名を付けてしまったが、auto_start_henkan等の強制的に変換を開始する場合にはおくりがな以外のpostfixが入ることもある。 converted_kana_to_okuri: String, // 現在の変換候補リスト candidate_list: CandidateList, @@ -33,7 +34,7 @@ pub(crate) struct CskkState { confirmed: String, // 今のかな変換の間に大文字でモード変更をしたかどうか。このステートによってシフトを押したままキー入力をしてしまった時に連続してモード変更しないようにしている。 capital_transition: bool, - // 現在送り仮名を入力しているかどうか。postfixを送り仮名として用いるべきかどうか。 + // 現在送り仮名を入力しているかどうか。converted_kana_to_okuriを送り仮名として用いるべきかどうか。 use_okurigana: bool, } @@ -276,22 +277,15 @@ impl CskkState { } /// 今のステートで変換する時の辞書のキーとして使うべき文字列を返す。 - pub(crate) fn get_composite_key(&self) -> String { - // ローマ字ベースではない入力規則に対応するため、送り仮名の最初の文字はひらがなから対応表を引く。 - if self.use_okurigana { - // ひらがなはUnicode Scalar Valueなのでchars()で十分。 - if let Some(first_kana) = self.converted_kana_to_okuri.chars().next() { - if let Some(okuri_first) = - KanaFormChanger::kana_to_okuri_prefix(&first_kana.to_string()) - { - let mut string = self.raw_to_composite.to_owned(); - string.push_str(okuri_first); - return string; - } - } + pub(crate) fn get_composite_key(&self) -> CompositeKey { + if self.use_okurigana && !self.converted_kana_to_okuri.is_empty() { + return CompositeKey::new( + &self.raw_to_composite, + Some(self.converted_kana_to_okuri.to_owned()), + ); } - self.raw_to_composite.to_owned() + CompositeKey::new(&self.raw_to_composite, None) } pub(crate) fn set_capital_transition(&mut self, has_transitioned: bool) { @@ -339,8 +333,8 @@ impl CskkState { /// 現在の変換候補を設定し、最初の候補を指す pub(crate) fn set_new_candidate_list(&mut self, candidates: Vec) { - let raw_to_composite = self.get_composite_key(); - self.candidate_list.set(raw_to_composite, candidates); + let composite_key = self.get_composite_key(); + self.candidate_list.set(composite_key, candidates); self.composited_okuri = self.converted_kana_to_okuri.to_string(); } } diff --git a/src/dictionary/candidate.rs b/src/dictionary/candidate.rs index 7500df9..e35bb1d 100644 --- a/src/dictionary/candidate.rs +++ b/src/dictionary/candidate.rs @@ -1,28 +1,26 @@ -use crate::dictionary::dictentry::DictEntry; -use crate::error::CskkError; -use log::*; -use std::fmt::Write; -use std::sync::Arc; +use crate::dictionary::dictionary_candidate::DictionaryCandidate; +use crate::dictionary::CompositeKey; -// Blind copy of libskk vala Candidate class +// CandidateListに持たせる情報。 +// libskk vala Candidate classを元に、単体で送り仮名の厳密マッチの登録に必要な情報を持たせている。TODO: libskk 由来なので重複した情報を整理、valaなので外に見せすぎ、特にcomposite_keyに含まれる情報は不要かも #[derive(Debug, Clone)] pub struct Candidate { - pub(crate) midashi: Arc, - #[allow(dead_code)] + // 取り回しの都合上DictEntryと重複して持つ + pub(crate) midashi: String, pub(crate) okuri: bool, // Raw kouho_text that might include "#0回" etc - pub(crate) kouho_text: Arc, - pub(crate) annotation: Option>, - // Output to show the candidate. + pub(crate) kouho_text: String, + pub(crate) annotation: Option, + // Output to show the candidate. "第#0回"が"第壱回"のように後処理されている想定。 pub(crate) output: String, } impl Default for Candidate { fn default() -> Self { Candidate { - midashi: Arc::new("エラー".to_owned()), + midashi: "エラー".to_string(), okuri: false, - kouho_text: Arc::new("エラー".to_owned()), + kouho_text: "エラー".to_string(), annotation: None, output: "エラー".to_string(), } @@ -40,10 +38,10 @@ impl PartialEq for Candidate { impl Candidate { pub(crate) fn new( - midashi: Arc, + midashi: String, okuri: bool, - kouho_text: Arc, - annotation: Option>, + kouho_text: String, + annotation: Option, output: String, ) -> Self { Candidate { @@ -55,60 +53,19 @@ impl Candidate { } } - pub(crate) fn from_skk_jisyo_string(midashi: &str, raw_entry: &str) -> Result { - let mut chunk = raw_entry.split(';'); - if let Some(text) = chunk.next() { - let kouho = DictEntry::process_lisp_fun(text); - let annotation = chunk - .next() - .map(|entry| Arc::new(DictEntry::process_lisp_fun(entry))); - Ok(Candidate::new( - Arc::new(midashi.to_string()), - false, - Arc::new(kouho.to_string()), - annotation, - kouho, - )) - } else { - debug!("Failed to parse candidate from: {:?}", raw_entry); - Err(CskkError::Error("No candidate".to_string())) - } - } - - // entry string between '/' - // {候補};アノテーション - // {候補};*アノテーション - // TODO: 将来的には [{優先送り仮名}/{候補}] のような優先送り仮名エントリも扱えると嬉しい - pub(crate) fn to_skk_jisyo_string(&self) -> String { - let mut result = String::new(); - result.push_str(&DictEntry::escape_dictionary_string( - self.kouho_text.as_str(), - )); - if let Some(annotation) = &self.annotation { - write!( - result, - ";{}", - &DictEntry::escape_dictionary_string(annotation.as_str()) - ) - .expect("Failed to allocate jisyo string for candidate."); + /// + /// 辞書の候補からそのままの内容で候補リスト用のcandidateを返す。 + /// + pub(in crate::dictionary) fn from_dictionary_candidate( + composite_key: &CompositeKey, + dictionary_cand: &DictionaryCandidate, + ) -> Self { + Self { + midashi: composite_key.get_to_composite().to_string(), + okuri: composite_key.has_okuri(), + kouho_text: dictionary_cand.kouho_text.to_owned(), + annotation: dictionary_cand.annotation.to_owned(), + output: dictionary_cand.kouho_text.to_owned(), } - result - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn skk_jisyo_string_no_annotation() { - let candidate = Candidate::new( - Arc::new("みだし".to_string()), - false, - Arc::new("候補".to_string()), - None, - "候補".to_string(), - ); - assert_eq!("候補", candidate.to_skk_jisyo_string()) } } diff --git a/src/dictionary/composite_key.rs b/src/dictionary/composite_key.rs new file mode 100644 index 0000000..e047070 --- /dev/null +++ b/src/dictionary/composite_key.rs @@ -0,0 +1,54 @@ +use crate::KanaFormChanger; + +/// 辞書を引くための情報 +/// 厳密な送り仮名マッチのため、送り仮名を複数文字含みうる。 +#[derive(Clone, Debug)] +pub(crate) struct CompositeKey { + to_composite: String, + // When Some(), should never be empty string. + okuri: Option, +} + +impl CompositeKey { + pub(crate) fn new(to_composite: &str, okuri: Option) -> Self { + CompositeKey { + to_composite: to_composite.to_owned(), + okuri, + } + } + + pub(crate) fn get_to_composite(&self) -> &str { + &self.to_composite + } + + pub(crate) fn get_okuri(&self) -> &Option { + &self.okuri + } + + pub(crate) fn has_okuri(&self) -> bool { + self.okuri.is_some() + } + + /// Return the string that should be used in dictionary file's midashi. + pub(in crate::dictionary) fn get_dict_key(&self) -> String { + if self.okuri.is_some() { + // ローマ字ベースではない入力規則に対応するため、送り仮名の最初の文字はひらがなから対応表を引く。 + if let Some(okuri) = KanaFormChanger::kana_to_okuri_prefix( + &self.okuri.as_ref().unwrap().chars().next().unwrap(), + ) + //KanaFormChanger::kana_to_okuri_prefix(&self.okuri.unwrap()) + { + let mut result = self.get_to_composite().to_string(); + result.push(okuri); + return result; + } + } + + self.to_composite.to_owned() + } + + pub(crate) fn clear(&mut self) { + self.to_composite.clear(); + self.okuri = None; + } +} diff --git a/src/dictionary/dictentry.rs b/src/dictionary/dictentry.rs index 45fca15..b4323d2 100644 --- a/src/dictionary/dictentry.rs +++ b/src/dictionary/dictentry.rs @@ -1,82 +1,185 @@ -use crate::dictionary::candidate::Candidate; +use crate::dictionary::dictionary_parser::{entry, CandidatePrototype, DictEntryPrototype}; +use crate::dictionary::DictionaryCandidate; +use crate::dictionary::{Candidate, CompositeKey}; use crate::error::CskkError; use anyhow::bail; +use nom::Finish; use regex::{Captures, Regex}; -use std::fmt::Write; +use std::collections::BTreeMap; +use std::fmt::{Display, Formatter}; #[derive(Debug, Clone)] pub(crate) struct DictEntry { - pub(crate) midashi: String, - pub(crate) candidates: Vec, + pub(in crate::dictionary) midashi: String, + // 本来はエントリ自体が持つものではないが、 + // 過去に送りありエントリと無しエントリを混ぜて扱っていたため、互換性のために区別をここにも持っている。 + has_okuri: bool, + // 厳密な送り仮名がない場合や送りなしエントリは空文字列からのマップ + strict_okuri_candidate_map: BTreeMap>, } impl DictEntry { - pub(crate) fn remove_matching_candidate(&mut self, candidate: &Candidate) { - let index = self - .candidates - .iter() - .position(|it| *(it.kouho_text) == *candidate.kouho_text); - if let Some(index) = index { - self.candidates.remove(index); + /// Usually, DictEntry should be created from [from_skkjisyo_line] + /// + /// Create new DictEntry that has single candidate + /// This is for registration of new composition. + /// + pub(in crate::dictionary) fn new( + midashi: &str, + composite_key: &CompositeKey, + candidate: &Candidate, + ) -> Self { + let mut new_map = BTreeMap::new(); + if let Some(strict_okuri) = composite_key.get_okuri() { + new_map.insert( + strict_okuri.to_owned(), + vec![DictionaryCandidate::from_candidate(candidate)], + ); + } + // even for okuri-ari key, register a non strict okuri entry + new_map.insert( + "".to_string(), + vec![DictionaryCandidate::from_candidate(candidate)], + ); + + Self { + midashi: midashi.to_string(), + has_okuri: composite_key.has_okuri(), + strict_okuri_candidate_map: new_map, } } - pub(crate) fn insert_as_first_candidate(&mut self, candidate: Candidate) { - if *candidate.midashi == self.midashi { - self.candidates.insert(0, candidate); + /// candidateが含まれなかった場合はこのdictentryの先頭に追加する。 + /// candidateがこのdictentryに含まれる場合は与えられたcandidateを先頭にする。 + /// composite_keyが送り仮名を含む場合、厳密な送り仮名なしのエントリと有りのエントリの両方について先頭にする。 + pub(in crate::dictionary) fn prioritize_candidate( + &mut self, + composite_key: &CompositeKey, + candidate: &Candidate, + ) { + if let Some(okuri) = composite_key.get_okuri() { + self.prioritize_candidate_for_okuri(okuri, candidate); } + + self.prioritize_candidate_for_okuri("", candidate); } - pub(crate) fn get_candidates(&self) -> &Vec { - &self.candidates + /// strict_okuriの候補の中でcandidateを優先する。 + fn prioritize_candidate_for_okuri(&mut self, strict_okuri: &str, candidate: &Candidate) { + // 長さもたいしたことがないのでVecを手作業でRecent used 更新している。LRUCacheを用いるべきか検討の余地あり。 + let mut done = false; + if let Some(cands) = self.strict_okuri_candidate_map.get_mut(strict_okuri) { + let index = cands + .iter() + .position(|it| it.kouho_text == candidate.kouho_text); + if let Some(i) = index { + cands.swap(0, i); + // done by swap + done = true; + } + + if !done { + cands.insert(0, DictionaryCandidate::from_candidate(candidate)); + // done by insert on top + done = true; + } + } + + if !done { + // create new mapping for okuri + self.strict_okuri_candidate_map.insert( + strict_okuri.to_string(), + vec![DictionaryCandidate::from_candidate(candidate)], + ); + } } - pub(crate) fn from_skkjisyo_line(line: &str) -> Result { - let mut result = Vec::new(); - let mut line = line.trim().split_ascii_whitespace(); - let midashi = if let Some(midashi) = line.next() { - DictEntry::process_lisp_fun(midashi) - } else { - return Err(CskkError::Error("No midshi".to_string())); - }; - let entries = line.collect::>().join(" "); - if entries.is_empty() { - return Err(CskkError::Error("No entries".to_string())); + /// + /// composite_keyが送りなしの場合、エントリからcandidateに合うものを削除する。合うものがなかったら何もしない。 + /// + /// composite_keyが送りありの場合、厳密な送り仮名マッチのエントリと厳密な送り仮名のないエントリの両方からcandidateにあうものを削除する。合うものがなかったら何もしない。 + /// + pub(in crate::dictionary) fn remove_matching_candidate( + &mut self, + composite_key: &CompositeKey, + candidate: &Candidate, + ) { + if let Some(okuri) = composite_key.get_okuri() { + self.remove_candidate_for_okuri(okuri, candidate); } - let entries = entries.split('/'); - for entry in entries { - if !entry.is_empty() { - if let Ok(candidate) = Candidate::from_skk_jisyo_string(&midashi, entry) { - result.push(candidate) - } + + self.remove_candidate_for_okuri("", candidate); + } + + fn remove_candidate_for_okuri(&mut self, strict_okuri: &str, candidate: &Candidate) { + if let Some(cands) = self.strict_okuri_candidate_map.get_mut(strict_okuri) { + let index = cands + .iter() + .position(|it| *(it.kouho_text) == *candidate.kouho_text); + if let Some(index) = index { + cands.remove(index); } } - Ok(Self { - midashi, - candidates: result, - }) } - // one line of dictionary. - // e.g. - // こうほ /候補/好捕/ - pub fn to_skk_jisyo_string(&self) -> String { - if self.candidates.is_empty() { - return "".to_string(); + /// strict_okuriのマッチするエントリを返す。 + /// + pub(in crate::dictionary) fn get_candidates( + &self, + strict_okuri: &Option, + ) -> Option<&Vec> { + return if let Some(okuri) = strict_okuri { + self.strict_okuri_candidate_map.get(okuri) + } else { + self.strict_okuri_candidate_map.get("") + }; + } + + /// + /// 過去に送り有無エントリを混ぜていた実装のため、ファイル読み込み側ではデフォルトでは送りありエントリと推定し、 + /// 行処理では見出し語先頭がアルファベットではない(abbrevエントリではないと推定) かつ 末尾にアルファベットが付かないものを送りなしエントリとして扱っている。 + /// + pub(crate) fn from_skkjisyo_line(line: &str) -> Result { + lazy_static! {} + let parsed = entry(line).finish(); + if let Ok((_, dict_entry_prototype)) = parsed { + Ok(DictEntry::from_dict_entry_prototype(dict_entry_prototype)) + } else { + Err(CskkError::ParseError(format!("falied to parse {}", line))) + } + } + + fn from_dict_entry_prototype(dict_entry_prototype: DictEntryPrototype) -> Self { + let midashi = DictEntry::process_lisp_fun(dict_entry_prototype.midashi); + let alphabet = [ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + ]; + let has_okuri = !midashi.starts_with(alphabet) && midashi.ends_with(alphabet); + + let strict_okuri_candidate_map = + DictEntry::candidates_from_prototype(dict_entry_prototype.candidates); + + Self { + midashi, + has_okuri, + strict_okuri_candidate_map, } + } - let mut result = String::new(); - write!( - result, - "{} ", - DictEntry::escape_dictionary_string(&self.midashi) - ) - .expect("Failed to allocate jisyo string for dict midashi"); - for candidate in &self.candidates { - write!(result, "/{}", &candidate.to_skk_jisyo_string()) - .expect("Failed to allocate jisyo string for dict entry"); + fn candidates_from_prototype( + candidates_prototype: BTreeMap<&str, Vec>, + ) -> BTreeMap> { + let mut result = BTreeMap::new(); + for (key, val) in candidates_prototype { + result.insert( + key.to_string(), + val.iter() + .map(DictionaryCandidate::from_candidate_prototype) + .collect(), + ); } - result.push('/'); + result } @@ -168,39 +271,62 @@ impl DictEntry { entry.to_owned() } + + /// true if this is likely okuri ari entry + pub(crate) fn is_okuri_ari_entry(&self) -> bool { + self.has_okuri + } +} + +impl Display for DictEntry { + /// + /// skk辞書内の一行 + /// + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ", DictEntry::escape_dictionary_string(&self.midashi))?; + for (strict_okuri, cands) in &self.strict_okuri_candidate_map { + if !strict_okuri.is_empty() { + write!(f, "/[{}", strict_okuri)?; + } + for cand in cands { + write!(f, "/")?; + write!(f, "{}", cand)?; + } + if !strict_okuri.is_empty() { + write!(f, "/]")?; + } + } + write!(f, "/") + } } #[cfg(test)] mod test { use super::*; use crate::testhelper::init_test_logger; - use std::sync::Arc; #[test] fn split_candidate_okuri_nashi() { let result = DictEntry::from_skkjisyo_line( "あい /愛/相/藍/間/合/亜衣;人名/哀;悲哀/埃;(ほこり)塵埃/挨;挨拶/曖;曖昧/瞹;「曖」の異体字/靉/噫;ああ/欸/隘;狭隘/娃/藹;和気藹々/阨;≒隘/穢;(慣用音)/姶;姶良町/会;?/饗;?/" - ); - let result = result.unwrap(); + ).unwrap(); assert_eq!("あい", result.midashi); - let Candidate { + let DictionaryCandidate { kouho_text, annotation, .. - } = &result.candidates[0]; - assert_eq!("愛", *kouho_text.as_ref()); + } = &result.strict_okuri_candidate_map.get("").unwrap()[0]; + assert_eq!("愛", *kouho_text); assert_eq!(None, *annotation); - let Candidate { + let DictionaryCandidate { kouho_text, annotation, .. - } = &result.candidates[5]; - assert_eq!("亜衣", *kouho_text.as_ref()); + } = &result.strict_okuri_candidate_map.get("").unwrap()[5]; + assert_eq!("亜衣", *kouho_text); assert_eq!( "人名", - *(annotation.as_ref()) - .expect("亜衣 doesn't have annotation") - .as_ref() + annotation.as_ref().expect("亜衣 doesn't have annotation") ); } @@ -209,29 +335,29 @@ mod test { let result = DictEntry::from_skkjisyo_line("おどr /踊;dance/躍;jump/踴;「踊」の異体字/"); let result = result.unwrap(); assert_eq!("おどr", result.midashi); - let Candidate { + let DictionaryCandidate { kouho_text, annotation, .. - } = &result.candidates[0]; - assert_eq!("踊", *kouho_text.as_ref()); + } = &result.strict_okuri_candidate_map.get("").unwrap()[0]; + assert_eq!("踊", kouho_text); assert_eq!( "dance", - *(annotation.as_ref()) - .expect("踊 in sense of dance doesn't have annotation") + annotation .as_ref() + .expect("踊 in sense of dance doesn't have annotation") ); - let Candidate { + let DictionaryCandidate { kouho_text, annotation, .. - } = &result.candidates[1]; - assert_eq!("躍".to_string(), *kouho_text.as_ref()); + } = &result.strict_okuri_candidate_map.get("").unwrap()[1]; + assert_eq!("躍", kouho_text); assert_eq!( - "jump".to_string(), - *(annotation.as_ref()) - .expect("躍 in sense of jump doesn't have annotation.") + "jump", + annotation .as_ref() + .expect("躍 in sense of jump doesn't have annotation.") ); } @@ -240,45 +366,50 @@ mod test { init_test_logger(); let jisyo = "おくr /送;(send)/贈;(present) 賞を贈る/遅/後;気後れ/遲;「遅」の旧字/"; let result = DictEntry::from_skkjisyo_line(jisyo).unwrap(); - assert_eq!("送", *result.candidates[0].kouho_text); - assert_eq!("遅", *result.candidates[2].kouho_text); + assert_eq!( + "送", + &result.strict_okuri_candidate_map.get("").unwrap()[0].kouho_text + ); + assert_eq!( + "遅", + &result.strict_okuri_candidate_map.get("").unwrap()[2].kouho_text + ); } #[test] fn to_string() { let jisyo = "あい /愛/相/藍/間/合/亜衣;人名/哀;悲哀/埃;(ほこり)塵埃/挨;挨拶/曖;曖昧/瞹;「曖」の異体字/靉/噫;ああ/欸/隘;狭隘/娃/藹;和気藹々/阨;≒隘/穢;(慣用音)/姶;姶良町/会;?/饗;?/"; let dict_entry = DictEntry::from_skkjisyo_line(jisyo).unwrap(); - assert_eq!(jisyo, &dict_entry.to_skk_jisyo_string()); + assert_eq!(jisyo, &dict_entry.to_string()); } #[test] - fn remove() { - let jisyo = "あい /愛/相/藍/間/合/亜衣;人名/哀;悲哀/埃;(ほこり)塵埃/挨;挨拶/曖;曖昧/瞹;「曖」の異体字/靉/噫;ああ/欸/隘;狭隘/娃/藹;和気藹々/阨;≒隘/穢;(慣用音)/姶;姶良町/会;?/饗;?/"; - let mut dict_entry = DictEntry::from_skkjisyo_line(jisyo).unwrap(); - let candidate = Candidate::from_skk_jisyo_string("あい", "愛").unwrap(); - dict_entry.remove_matching_candidate(&candidate); - let Candidate { - kouho_text, - annotation, - .. - } = &dict_entry.candidates[0]; - assert_eq!("相", *kouho_text.as_ref()); - assert_eq!(None, *annotation); + fn to_string_with_strict_okuri() { + let jisyo = "あいs /愛/相/藍/間/合/亜衣;人名/哀;悲哀/埃;(ほこり)塵埃/挨;挨拶/曖;曖昧/瞹;「曖」の異体字/靉/噫;ああ/欸/隘;狭隘/娃/藹;和気藹々/阨;≒隘/穢;(慣用音)/姶;姶良町/会;?/饗;?/[さ/ダミー1/ダミー2/]/[せ/ダミー/]/"; + let dict_entry = DictEntry::from_skkjisyo_line(jisyo).unwrap(); + assert_eq!(jisyo, &dict_entry.to_string()); } #[test] - fn insert() { + fn remove() { let jisyo = "あい /愛/相/藍/間/合/亜衣;人名/哀;悲哀/埃;(ほこり)塵埃/挨;挨拶/曖;曖昧/瞹;「曖」の異体字/靉/噫;ああ/欸/隘;狭隘/娃/藹;和気藹々/阨;≒隘/穢;(慣用音)/姶;姶良町/会;?/饗;?/"; let mut dict_entry = DictEntry::from_skkjisyo_line(jisyo).unwrap(); - let candidate = Candidate::from_skk_jisyo_string("あい", "アイ;foo").unwrap(); - dict_entry.insert_as_first_candidate(candidate); - let Candidate { + let candidate = Candidate::new( + "あい".to_string(), + false, + "愛".to_string(), + None, + "愛".to_string(), + ); + let composite_key = CompositeKey::new("あい", None); + dict_entry.remove_matching_candidate(&composite_key, &candidate); + let DictionaryCandidate { kouho_text, annotation, .. - } = &dict_entry.candidates[0]; - assert_eq!("アイ", *kouho_text.as_ref()); - assert_eq!(Some(Arc::new("foo".to_string())), *annotation); + } = &dict_entry.strict_okuri_candidate_map.get("").unwrap()[0]; + assert_eq!("相", kouho_text); + assert_eq!(None, *annotation); } #[test] @@ -306,4 +437,10 @@ mod test { let result = DictEntry::escape_dictionary_string("(;;/)"); assert_eq!(r#"(concat "(\073\073\057)")"#, result); } + + #[test] + fn is_okuri_ari() { + let entry = DictEntry::from_skkjisyo_line("おくr /送;(send)/").unwrap(); + assert!(entry.is_okuri_ari_entry()); + } } diff --git a/src/dictionary/dictionary_candidate.rs b/src/dictionary/dictionary_candidate.rs new file mode 100644 index 0000000..891483f --- /dev/null +++ b/src/dictionary/dictionary_candidate.rs @@ -0,0 +1,59 @@ +use crate::dictionary::dictentry::DictEntry; +use crate::dictionary::dictionary_parser::CandidatePrototype; +use crate::Candidate; +use std::fmt::{Display, Formatter}; + +// Candidateの辞書内のデータ。 +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub(in crate::dictionary) struct DictionaryCandidate { + // Raw kouho_text that might include "#0回" etc + pub(in crate::dictionary) kouho_text: String, + pub(in crate::dictionary) annotation: Option, +} + +impl DictionaryCandidate { + pub(in crate::dictionary) fn from_candidate_prototype( + candidate_prototype: &CandidatePrototype, + ) -> Self { + let kouho_text = DictEntry::process_lisp_fun(candidate_prototype.kouho); + let annotation = candidate_prototype + .annotation + .map(DictEntry::process_lisp_fun); + + Self { + kouho_text, + annotation, + } + } + + // 送り仮名の厳密でないマッチから送り仮名の厳密マッチで新たに登録する際など。 + /// 候補リスト内のcandidateから新たにdictionary内部表現のcandidateを作る。 + pub(in crate::dictionary) fn from_candidate(candidate: &Candidate) -> Self { + let kouho_text = candidate.kouho_text.to_owned(); + let annotation = candidate.annotation.to_owned(); + + Self { + kouho_text, + annotation, + } + } +} + +impl Display for DictionaryCandidate { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.annotation.is_some() { + write!( + f, + "{};{}", + DictEntry::escape_dictionary_string(&self.kouho_text), + DictEntry::escape_dictionary_string(self.annotation.as_ref().unwrap()) + ) + } else { + write!( + f, + "{}", + DictEntry::escape_dictionary_string(&self.kouho_text) + ) + } + } +} diff --git a/src/dictionary/dictionary_parser.rs b/src/dictionary/dictionary_parser.rs new file mode 100644 index 0000000..23e961d --- /dev/null +++ b/src/dictionary/dictionary_parser.rs @@ -0,0 +1,238 @@ +use nom::bytes::complete::{tag, take, take_till1, take_until, take_while1}; +use nom::character::complete::char; +use nom::combinator::{opt, verify}; +use nom::multi::many1; +use nom::sequence::delimited; +use nom::IResult; +use std::collections::BTreeMap; + +#[derive(PartialEq, Debug, Clone)] +pub(in crate::dictionary) struct CandidatePrototype<'a> { + pub(in crate::dictionary) kouho: &'a str, + pub(in crate::dictionary) annotation: Option<&'a str>, +} + +#[derive(PartialEq, Debug, Clone)] +pub(in crate::dictionary) struct DictEntryPrototype<'a> { + pub(in crate::dictionary) midashi: &'a str, + pub(in crate::dictionary) candidates: BTreeMap<&'a str, Vec>>, +} + +/// 辞書のエントリを読む +pub(in crate::dictionary) fn entry(input: &str) -> IResult<&str, DictEntryPrototype> { + let (i, midashi) = midashi(input)?; + let (i, _) = take_while1(|c| c == ' ')(i)?; + let (_, candidates) = candidates(i)?; + + Ok(( + "", + DictEntryPrototype { + midashi, + candidates, + }, + )) +} + +fn midashi(input: &str) -> IResult<&str, &str> { + let (i, midashi) = take_till1(|c: char| c == ' ')(input)?; + Ok((i, midashi)) +} + +/// '/'を含む'/'で囲われた候補リスト全体からcandidate全部 +fn candidates(input: &str) -> IResult<&str, BTreeMap<&str, Vec>> { + let (i, parsed_cands) = many1(candidate)(input)?; + // Make sure ends with '/' + let _ = char('/')(i)?; + + let mut result = BTreeMap::<&str, Vec>::new(); + for mut cand_map in parsed_cands { + for (okuri, value) in cand_map.iter_mut() { + if let Some(candidates) = result.get_mut(*okuri) { + candidates.append(value); + } else { + let mut new_candidates = vec![]; + new_candidates.append(value); + result.insert(okuri, new_candidates); + } + } + } + Ok(("", result)) +} + +/// '/'を含む'/'で囲われた部分から最初の'/'で囲われた部分を解釈し、Vecの厳密な送り仮名からのマップを返す。 +/// 通常のcandidateだと空文字列からのマップで1要素のもの、厳密送りだと再帰的に含まれるので複数要素。 +fn candidate(input: &str) -> IResult<&str, BTreeMap<&str, Vec>> { + let result; + let mut rest; + let (i, is_strict_okuri) = opt(delimited(tag("/["), take_until("]"), char(']')))(input)?; + rest = i; + if let Some(delimited_str) = is_strict_okuri { + // let taken = is_strict_okuri.unwrap(); + let (_, cand) = strict_okuri_candidates(delimited_str)?; + result = cand; + } else { + let (i, cand) = non_strict_okuri_candidate(i)?; + let mut map = BTreeMap::new(); + map.insert("", vec![cand]); + result = map; + rest = i; + } + + Ok((rest, result)) +} + +/// []を含まない厳密な送り仮名候補列の[]の間の'かな文字列/候補/候補/'を受けてその文字列からの候補マップを返す +fn strict_okuri_candidates(input: &str) -> IResult<&str, BTreeMap<&str, Vec>> { + // from U+3041 to U+3096 + let (i, okuri_kana) = take_while1(|c: char| ('ぁ'..'ゖ').contains(&c))(input)?; + let (i, cands) = many1(non_strict_okuri_candidate)(i)?; + let (i, _) = verify(take(1usize), |c: &str| c == "/")(i)?; + let mut result = BTreeMap::new(); + result.insert(okuri_kana, cands); + Ok((i, result)) +} + +// fn non_okuri_candidates(input: &str) -> IResult<&str, BTreeMap> {} + +/// '/'を含む候補部分から次の'/'直前までの候補を解釈する、ただし厳密な送り仮名の候補は解釈できない。 +fn non_strict_okuri_candidate(input: &str) -> IResult<&str, CandidatePrototype> { + //let (i, _) = verify(take(1usize), |c: &str| c == "/")(input)?; + let (i, _) = char('/')(input)?; + let (i, cand) = verify(take_till1(|c: char| c == '/'), |s: &str| { + !contains_non_srict_okuri_candidate_illegal_char(s) + })(i)?; + let (rest, taken) = take_till1(|c: char| c == ';')(cand)?; + if rest.is_empty() { + Ok(( + i, + CandidatePrototype { + kouho: taken, + annotation: None, + }, + )) + } else { + let (rest, _) = char(';')(rest)?; + Ok(( + i, + CandidatePrototype { + kouho: taken, + annotation: Some(rest), + }, + )) + } +} + +// true when contains chars not good for non strict okuri candidate: '[', ']', '/' +fn contains_non_srict_okuri_candidate_illegal_char(s: &str) -> bool { + s.contains('[') || s.contains(']') || s.contains('/') +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn basic_midashi() { + let (_i, result) = midashi("ほげr ////").unwrap(); + assert_eq!(result, "ほげr"); + } + + #[test] + fn basic_candidates() { + let (rest, result) = candidates("/愛;love/相/[す/愛/]/").unwrap(); + let mut expected = BTreeMap::new(); + expected.insert( + "", + vec![ + CandidatePrototype { + kouho: "愛", + annotation: Some("love"), + }, + CandidatePrototype { + kouho: "相", + annotation: None, + }, + ], + ); + expected.insert( + "す", + vec![CandidatePrototype { + kouho: "愛", + annotation: None, + }], + ); + + assert_eq!(rest, ""); + assert_eq!(result, expected) + } + + #[test] + fn basic_candidate() { + let (rest, result) = candidate("/愛/相/").unwrap(); + assert_eq!(rest, "/相/"); + let mut expected = BTreeMap::new(); + expected.insert( + "", + vec![CandidatePrototype { + kouho: "愛", + annotation: None, + }], + ); + assert_eq!(result, expected); + } + + #[test] + fn strict_okuri_candidate_in_candidates() { + let (rest, result) = candidate("/[つ/打;hit/討/]/打/").unwrap(); + assert_eq!(rest, "/打/"); + let mut expected = BTreeMap::new(); + expected.insert( + "つ", + vec![ + CandidatePrototype { + kouho: "打", + annotation: Some("hit"), + }, + CandidatePrototype { + kouho: "討", + annotation: None, + }, + ], + ); + assert_eq!(result, expected); + } + + #[test] + fn basic_strict_okuri_candidate() { + let mut expected = BTreeMap::new(); + expected.insert( + "って", + vec![ + CandidatePrototype { + kouho: "送", + annotation: None, + }, + CandidatePrototype { + kouho: "贈", + annotation: None, + }, + ], + ); + let (i, result) = strict_okuri_candidates("って/送/贈/").unwrap(); + assert_eq!(i, ""); + assert_eq!(result, expected); + } + + #[test] + fn non_strict_okuri_candidate_test() { + let (rest, result) = non_strict_okuri_candidate("/送/贈/").unwrap(); + assert_eq!( + result, + CandidatePrototype { + kouho: "送", + annotation: None + } + ); + assert_eq!(rest, "/贈/"); + } +} diff --git a/src/dictionary/empty_dict.rs b/src/dictionary/empty_dict.rs index 142f0f4..e6a12dd 100644 --- a/src/dictionary/empty_dict.rs +++ b/src/dictionary/empty_dict.rs @@ -1,5 +1,5 @@ use crate::dictionary::dictentry::DictEntry; -use crate::dictionary::Dictionary; +use crate::dictionary::{CompositeKey, Dictionary}; /// /// Empty dictionary @@ -8,7 +8,7 @@ use crate::dictionary::Dictionary; pub(crate) struct EmptyDictionary {} impl Dictionary for EmptyDictionary { - fn lookup(&self, _midashi: &str, _okuri: bool) -> Option<&DictEntry> { + fn lookup(&self, _composite_key: &CompositeKey) -> Option<&DictEntry> { None } } diff --git a/src/dictionary/file_dictionary.rs b/src/dictionary/file_dictionary.rs index 2285e4e..615a823 100644 --- a/src/dictionary/file_dictionary.rs +++ b/src/dictionary/file_dictionary.rs @@ -4,16 +4,20 @@ use crate::error::CskkError; use encoding_rs::Encoding; use encoding_rs_io::DecodeReaderBytesBuilder; use log::warn; -use std::collections::BTreeMap; use std::fs::File; use std::io::{BufRead, BufReader}; -pub(crate) trait FileDictionary: Dictionary { +pub(in crate::dictionary) struct DictionaryEntries { + pub(in crate::dictionary) okuri_ari: Vec<(String, DictEntry)>, + pub(in crate::dictionary) okuri_nashi: Vec<(String, DictEntry)>, +} + +pub(in crate::dictionary) trait FileDictionary: Dictionary { fn file_path(&self) -> &str; fn encode(&self) -> &str; - fn set_dictionary(&mut self, dictionary: BTreeMap); + fn set_dictionary(&mut self, dictionary: DictionaryEntries); fn reload(&mut self) -> Result<(), CskkError> { let dictionary = load_dictionary(self.file_path(), self.encode().as_bytes())?; @@ -22,28 +26,52 @@ pub(crate) trait FileDictionary: Dictionary { } } -pub(crate) fn load_dictionary( +enum DictionaryLoadMode { + OkuriAri, + OkuriNashi, +} + +/// 順序付きで辞書を読む +pub(in crate::dictionary) fn load_dictionary( file_path: &str, encode: &[u8], -) -> Result, CskkError> { +) -> Result { let dict_file = File::open(file_path)?; let enc = Encoding::for_label_no_replacement(encode); let decoder = DecodeReaderBytesBuilder::new() .encoding(enc) .build(dict_file); let reader = BufReader::new(decoder); - let mut dictionary = BTreeMap::new(); + let mut okuri_ari_dictionary = Vec::new(); + let mut okuri_nashi_dictionary = Vec::new(); + + // 後の送り仮名再確認の時にabbrevエントリを読み間違えないため、デフォルトはOkuriAri + let mut mode = DictionaryLoadMode::OkuriAri; for line in reader.lines() { match line { Ok(line) => { if line.starts_with(';') { - // Skip + if line.contains(";; okuri-ari entries") { + mode = DictionaryLoadMode::OkuriAri; + } else if line.contains(";; okuri-nasi entries") { + mode = DictionaryLoadMode::OkuriNashi + } } else { let parsed = DictEntry::from_skkjisyo_line(&line); match parsed { - Ok(parsed) => { - dictionary.insert(parsed.midashi.clone(), parsed); - } + Ok(parsed) => match mode { + DictionaryLoadMode::OkuriAri => { + // 過去の辞書でokuri-ari,nasiを無視して保存していた互換性のため、行をparseした内容で確認しなおす。 + if parsed.is_okuri_ari_entry() { + okuri_ari_dictionary.push((parsed.midashi.clone(), parsed)); + } else { + okuri_nashi_dictionary.push((parsed.midashi.clone(), parsed)); + } + } + DictionaryLoadMode::OkuriNashi => { + okuri_nashi_dictionary.push((parsed.midashi.clone(), parsed)); + } + }, Err(_) => { warn!("Dict is ill formatted. Ignored line {}", &line); } @@ -55,5 +83,8 @@ pub(crate) fn load_dictionary( } } } - Ok(dictionary) + Ok(DictionaryEntries { + okuri_nashi: okuri_nashi_dictionary, + okuri_ari: okuri_ari_dictionary, + }) } diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index 42f6682..4db0d02 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -1,25 +1,28 @@ -use static_dict::StaticFileDict; - -use crate::dictionary::candidate::Candidate; -use crate::error::CskkError; -use dictentry::DictEntry; -use empty_dict::EmptyDictionary; -use std::sync::{Arc, Mutex}; -use user_dictionary::UserDictionary; - pub(crate) mod candidate; +pub(crate) mod composite_key; pub(crate) mod dictentry; +mod dictionary_candidate; +mod dictionary_parser; pub mod empty_dict; pub(crate) mod file_dictionary; pub mod static_dict; pub mod user_dictionary; +use crate::error::CskkError; use crate::form_changer::numeric_form_changer::{ numeric_to_daiji_as_number, numeric_to_kanji_each, numeric_to_simple_kanji_as_number, numeric_to_thousand_separator, numeric_to_zenkaku, }; +pub(crate) use candidate::Candidate; +pub(crate) use composite_key::CompositeKey; +use dictentry::DictEntry; +pub(in crate::dictionary) use dictionary_candidate::DictionaryCandidate; +use empty_dict::EmptyDictionary; use log::*; use regex::Regex; +use static_dict::StaticFileDict; +use std::sync::{Arc, Mutex}; +use user_dictionary::UserDictionary; // C側に出す関係でSizedである必要があり、dyn Traitではなくenumでラップする。 #[derive(Debug)] @@ -76,15 +79,22 @@ impl CskkDictionary { /// Returns true if updated the dictionary. pub(crate) fn confirm_candidate( dictionary: &mut Arc, + composite_key: &CompositeKey, candidate: &Candidate, ) -> Result { debug!("confirm: {:?}", candidate); // Using mutex in match on purpose, never acquiring lock again. #[allow(clippy::significant_drop_in_scrutinee)] match *dictionary.mutex.lock().unwrap() { - CskkDictionaryType::StaticFile(ref mut dict) => dict.select_candidate(candidate), - CskkDictionaryType::UserFile(ref mut dict) => dict.select_candidate(candidate), - CskkDictionaryType::EmptyDict(ref mut dict) => dict.select_candidate(candidate), + CskkDictionaryType::StaticFile(ref mut dict) => { + dict.select_candidate(composite_key, candidate) + } + CskkDictionaryType::UserFile(ref mut dict) => { + dict.select_candidate(composite_key, candidate) + } + CskkDictionaryType::EmptyDict(ref mut dict) => { + dict.select_candidate(composite_key, candidate) + } } } @@ -93,24 +103,30 @@ pub(crate) fn confirm_candidate( /// Returns true if updated the dictionary. pub(crate) fn purge_candidate( dictionary: &mut Arc, + composite_key: &CompositeKey, candidate: &Candidate, ) -> Result { // Using mutex in match on purpose, never acquiring lock again. #[allow(clippy::significant_drop_in_scrutinee)] match *dictionary.mutex.lock().unwrap() { - CskkDictionaryType::StaticFile(ref mut dict) => dict.purge_candidate(candidate), - CskkDictionaryType::UserFile(ref mut dict) => dict.purge_candidate(candidate), - CskkDictionaryType::EmptyDict(ref mut dict) => dict.purge_candidate(candidate), + CskkDictionaryType::StaticFile(ref mut dict) => { + dict.purge_candidate(composite_key, candidate) + } + CskkDictionaryType::UserFile(ref mut dict) => { + dict.purge_candidate(composite_key, candidate) + } + CskkDictionaryType::EmptyDict(ref mut dict) => { + dict.purge_candidate(composite_key, candidate) + } } } -/// 現在ueno/libskk同様にDedupはkouho_textのみ、候補の順序はdictの順番通り。 -/// annotationについては特に決めていないが、現在のところsortの仕様により先の候補が優先される。 +/// 現在ueno/libskk同様にDedupはkouho_textのみ。 pub(crate) fn get_all_candidates( dictionaries: &[Arc], - raw_to_composite: &str, + composite_key: &CompositeKey, ) -> Vec { - get_all_candidates_inner(dictionaries, raw_to_composite, false) + get_all_candidates_inner(dictionaries, composite_key, false) } lazy_static! { @@ -123,39 +139,35 @@ lazy_static! { /// fn get_all_candidates_inner( dictionaries: &[Arc], - raw_to_composite: &str, + composite_key: &CompositeKey, is_numeric_re_lookup: bool, ) -> Vec { let mut deduped_candidates = vec![]; let mut ordered_candidates = vec![]; - let mut dict_key = raw_to_composite.to_string(); + let mut composite_key = composite_key.to_owned(); let mut matched_numbers = vec![]; if !is_numeric_re_lookup { // FIXME: destructuring-bind is unstable yet in current Rust. Fix in future Rust. - let pair = to_composite_to_numeric_dict_key(raw_to_composite); - dict_key = pair.0; + let pair = to_composite_to_numeric_dict_key(&composite_key); + composite_key = pair.0; matched_numbers = pair.1; } - for cskkdict in dictionaries.iter() { - let lock = cskkdict.mutex.lock().unwrap(); - if let Some(dict_entry) = match &*lock { - CskkDictionaryType::StaticFile(dict) => dict.lookup(&dict_key, false), - CskkDictionaryType::UserFile(dict) => dict.lookup(&dict_key, false), - CskkDictionaryType::EmptyDict(dict) => dict.lookup(&dict_key, false), - } { - ordered_candidates.extend(dict_entry.get_candidates().to_owned()); - deduped_candidates.extend(dict_entry.get_candidates().to_owned()); - } - } + let candidates = get_candidates_in_order(dictionaries, &composite_key); + ordered_candidates.extend(candidates.to_owned()); + deduped_candidates.extend(candidates); if deduped_candidates.is_empty() { return vec![]; } - deduped_candidates.sort_by(|a, b| a.kouho_text.cmp(&b.kouho_text)); + deduped_candidates.sort_unstable(); + // Make Option == some come before None + deduped_candidates.reverse(); deduped_candidates.dedup_by(|a, b| a.kouho_text == b.kouho_text); + // reverse back for faster iteration? maybe unneeded. + deduped_candidates.reverse(); let mut result = vec![]; for candidate in ordered_candidates { @@ -163,10 +175,13 @@ fn get_all_candidates_inner( for (pos, deduped) in deduped_candidates.iter().enumerate() { if (*deduped).eq(&candidate) { if is_numeric_re_lookup { - result.push((*deduped).clone()); + result.push(Candidate::from_dictionary_candidate( + &composite_key, + deduped, + )); } else { result.append(&mut replace_numeric_match( - deduped, + &Candidate::from_dictionary_candidate(&composite_key, deduped), &matched_numbers, dictionaries, )); @@ -182,19 +197,63 @@ fn get_all_candidates_inner( result } +/// dictionariesからcompositeKeyに合わせた順序でDictionaryCandidateを返す。 +/// +/// compositeKeyが送り無しの場合、単なる辞書順 +/// +/// compositeKeyが送り有りの場合、まず送り仮名の厳密マッチする候補を辞書順に、その後厳密マッチのない候補を辞書順に。 +/// +fn get_candidates_in_order( + dictionaries: &[Arc], + composite_key: &CompositeKey, +) -> Vec { + let mut result = Vec::new(); + + for cskkdict in dictionaries.iter() { + let lock = cskkdict.mutex.lock().unwrap(); + if let Some(dict_entry) = match &*lock { + CskkDictionaryType::StaticFile(dict) => dict.lookup(composite_key), + CskkDictionaryType::UserFile(dict) => dict.lookup(composite_key), + CskkDictionaryType::EmptyDict(dict) => dict.lookup(composite_key), + } { + let strict_okuri_cands = if composite_key.has_okuri() { + dict_entry.get_candidates(composite_key.get_okuri()) + } else { + None + }; + if let Some(candidates) = strict_okuri_cands { + result.extend(candidates.to_owned()); + } + + let non_strict_okuri_cands = dict_entry.get_candidates(&None); + if let Some(candidates) = non_strict_okuri_cands { + result.extend(candidates.to_owned()); + } + } + } + + result +} + /// /// 数字が含まれていた場合#に置きかえて数字と共にかえす。 +/// /// 12がつ6にち -> (#がつ#にち, [12,6]) /// -pub(crate) fn to_composite_to_numeric_dict_key(to_composite: &str) -> (String, Vec) { - let mut dict_key = to_composite.to_string(); +pub(crate) fn to_composite_to_numeric_dict_key( + to_composite: &CompositeKey, +) -> (CompositeKey, Vec) { + let mut dict_key = to_composite.get_to_composite().to_owned(); let mut matched_numbers = vec![]; - for numeric_match in NUM_REGEX.find_iter(to_composite) { + for numeric_match in NUM_REGEX.find_iter(to_composite.get_to_composite()) { let new_dict_key = dict_key.replacen(numeric_match.as_str(), "#", 1); dict_key = new_dict_key; matched_numbers.push(numeric_match.as_str().to_owned()); } - (dict_key, matched_numbers) + ( + CompositeKey::new(&dict_key, to_composite.get_okuri().to_owned()), + matched_numbers, + ) } /// Return how many numeric string is in string to composite @@ -295,8 +354,11 @@ pub(crate) fn replace_numeric_string( } "#4" => { let mut replaced_output_texts = vec![]; - let numeric_lookup_results = - get_all_candidates_inner(dictionaries, &numbers[n], true); + let numeric_lookup_results = get_all_candidates_inner( + dictionaries, + &CompositeKey::new(&numbers[n], None), + true, + ); for kouho_text in ¤t_output_texts { for numeric_lookup in &numeric_lookup_results { replaced_output_texts.push(kouho_text.replacen( @@ -348,16 +410,16 @@ pub(crate) fn replace_numeric_string( #[allow(dead_code)] pub(crate) fn get_nth_candidate( dictionaries: &[Arc], - to_composite: &str, + composite_key: &CompositeKey, selection_pointer: usize, ) -> Option { - let candidates = get_all_candidates(dictionaries, to_composite); + let candidates = get_all_candidates(dictionaries, composite_key); candidates.get(selection_pointer).cloned() } pub(crate) trait Dictionary { - /// 今のところ数値変換等がないので、raw_to_compositeではなくmidashiとして完全一致を探す。 - fn lookup(&self, midashi: &str, _okuri: bool) -> Option<&DictEntry>; + /// midashiと一致するエントリを返す。 + fn lookup(&self, composite_key: &CompositeKey) -> Option<&DictEntry>; fn is_read_only(&self) -> bool { true @@ -377,14 +439,27 @@ pub(crate) trait Dictionary { /// Select that candidate. /// Supporting dictionary will add and move that candidate to the first place so that next time it comes to candidate early. /// Safe to call to read_only dictionary. - fn select_candidate(&mut self, _candidate: &Candidate) -> Result { + fn select_candidate( + &mut self, + _composite_key: &CompositeKey, + _candidate: &Candidate, + ) -> Result { Ok(false) } /// Remove that candidate if dictionary supports editing. /// Safe to call to read_only dictionary - fn purge_candidate(&mut self, _candidate: &Candidate) -> Result { + fn purge_candidate( + &mut self, + _composite_key: &CompositeKey, + _candidate: &Candidate, + ) -> Result { Ok(false) } + + /// Reload dictionary. + fn reload(&mut self) -> Result<(), CskkError> { + Ok(()) + } } #[cfg(test)] diff --git a/src/dictionary/static_dict.rs b/src/dictionary/static_dict.rs index c22f6bb..e6fdaf2 100644 --- a/src/dictionary/static_dict.rs +++ b/src/dictionary/static_dict.rs @@ -1,15 +1,16 @@ +use crate::dictionary::file_dictionary::{load_dictionary, DictionaryEntries, FileDictionary}; +use crate::dictionary::{CompositeKey, DictEntry, Dictionary}; use crate::CskkError; use std::collections::BTreeMap; - -use crate::dictionary::file_dictionary::{load_dictionary, FileDictionary}; -use crate::dictionary::{DictEntry, Dictionary}; +use std::iter::FromIterator; #[derive(Debug)] pub(crate) struct StaticFileDict { file_path: String, encode: String, // Midashi -> DictEntry map - dictionary: BTreeMap, + okuri_ari_dictionary: BTreeMap, + okuri_nashi_dictionary: BTreeMap, } impl StaticFileDict { @@ -20,14 +21,24 @@ impl StaticFileDict { Ok(StaticFileDict { file_path: String::from(file_path), encode: encode.to_string(), - dictionary, + okuri_ari_dictionary: BTreeMap::from_iter(dictionary.okuri_ari), + okuri_nashi_dictionary: BTreeMap::from_iter(dictionary.okuri_nashi), }) } } impl Dictionary for StaticFileDict { - fn lookup(&self, midashi: &str, _okuri: bool) -> Option<&DictEntry> { - self.dictionary.get(midashi) + fn lookup(&self, composite_key: &CompositeKey) -> Option<&DictEntry> { + return if composite_key.has_okuri() { + self.okuri_ari_dictionary.get(&composite_key.get_dict_key()) + } else { + self.okuri_nashi_dictionary + .get(&composite_key.get_dict_key()) + }; + } + + fn reload(&mut self) -> Result<(), CskkError> { + FileDictionary::reload(self) } } @@ -40,7 +51,8 @@ impl FileDictionary for StaticFileDict { &self.encode } - fn set_dictionary(&mut self, dictionary: BTreeMap) { - self.dictionary = dictionary + fn set_dictionary(&mut self, dictionary: DictionaryEntries) { + self.okuri_ari_dictionary = BTreeMap::from_iter(dictionary.okuri_ari); + self.okuri_nashi_dictionary = BTreeMap::from_iter(dictionary.okuri_nashi); } } diff --git a/src/dictionary/user_dictionary.rs b/src/dictionary/user_dictionary.rs index 18a0748..e6cbd39 100644 --- a/src/dictionary/user_dictionary.rs +++ b/src/dictionary/user_dictionary.rs @@ -1,12 +1,11 @@ -use std::collections::BTreeMap; - use crate::dictionary::candidate::Candidate; -use crate::dictionary::file_dictionary::{load_dictionary, FileDictionary}; -use crate::dictionary::{DictEntry, Dictionary}; +use crate::dictionary::file_dictionary::{load_dictionary, DictionaryEntries, FileDictionary}; +use crate::dictionary::{CompositeKey, DictEntry, Dictionary}; use crate::error::CskkError; use crate::error::CskkError::Error; use encoding_rs::{Encoder, EncoderResult, Encoding}; use log::*; +use lru::LruCache; use std::fs::{rename, File}; use std::io::{BufWriter, Write}; @@ -19,7 +18,8 @@ pub(crate) struct UserDictionary { file_path: String, encode: String, // Midashi -> DictEntry map - dictionary: BTreeMap, + okuri_ari_dictionary: LruCache, + okuri_nashi_dictionary: LruCache, // Just bool, because we know this is under mutex. has_change: bool, } @@ -29,18 +29,35 @@ const BUF_SIZE: usize = 1024; impl UserDictionary { pub(crate) fn new(file_path: &str, encode: &str) -> Result { let dictionary = load_dictionary(file_path, encode.as_bytes())?; + let mut okuri_ari_dict = LruCache::unbounded(); + for (k, v) in dictionary.okuri_ari.into_iter().rev() { + okuri_ari_dict.put(k, v); + } + + let mut okuri_nashi_dict = LruCache::unbounded(); + for (k, v) in dictionary.okuri_nashi.into_iter().rev() { + okuri_nashi_dict.put(k, v); + } + Ok(UserDictionary { file_path: String::from(file_path), encode: encode.to_string(), - dictionary, + okuri_ari_dictionary: okuri_ari_dict, + okuri_nashi_dictionary: okuri_nashi_dict, has_change: false, }) } } impl Dictionary for UserDictionary { - fn lookup(&self, midashi: &str, _okuri: bool) -> Option<&DictEntry> { - self.dictionary.get(midashi) + fn lookup(&self, composite_key: &CompositeKey) -> Option<&DictEntry> { + return if composite_key.has_okuri() { + self.okuri_ari_dictionary + .peek(&composite_key.get_dict_key()) + } else { + self.okuri_nashi_dictionary + .peek(&composite_key.get_dict_key()) + }; } fn is_read_only(&self) -> bool { @@ -48,7 +65,8 @@ impl Dictionary for UserDictionary { } /// {file_path}.BAK に退避してからfile_pathに保存する - /// TODO: 現在は他の辞書と互換性がないただのエントリの羅列なので、okuri-ari entriesとokuri-nasi entriesに分けてddskkのようにファイル上で走査する辞書互換にする。 + /// 辞書ファイルのフォーマットは SKK 16.2 user manual 5.10.7 辞書の書式 に依る + /// userdictなので送りありエントリも送りなしエントリも最近使用した順に並ぶ。 fn save_dictionary(&mut self) -> Result { if self.has_change { rename(&self.file_path, &format!("{}.BAK", self.file_path))?; @@ -58,8 +76,30 @@ impl Dictionary for UserDictionary { let mut enc = Encoding::for_label(self.encode.as_bytes()) .expect("It should be same as encoding name succeeded when loading file.") .new_encoder(); - for dictentry in self.dictionary.values() { - let mut source = dictentry.to_skk_jisyo_string(); + + // Not using. Can't compile on mac. + // let encoded = encode_string( + // &mut enc, + // &format!( + // ";; Save on {} \n", + // chrono::offset::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, false) + // ), + // )?; + // stream.write_all(encoded.as_slice())?; + + let encoded = encode_string(&mut enc, ";; okuri-ari entries.\n")?; + stream.write_all(encoded.as_slice())?; + for (_, dictentry) in self.okuri_ari_dictionary.iter() { + let mut source = dictentry.to_string(); + source += "\n"; + if let Ok(encoded) = encode_string(&mut enc, source.as_mut_str()) { + stream.write_all(encoded.as_slice())?; + } + } + let encoded = encode_string(&mut enc, ";; okuri-nasi entries.\n")?; + stream.write_all(encoded.as_slice())?; + for (_, dictentry) in self.okuri_nashi_dictionary.iter() { + let mut source = dictentry.to_string(); source += "\n"; if let Ok(encoded) = encode_string(&mut enc, source.as_mut_str()) { stream.write_all(encoded.as_slice())?; @@ -73,22 +113,28 @@ impl Dictionary for UserDictionary { } } - fn select_candidate(&mut self, candidate: &Candidate) -> Result { + fn select_candidate( + &mut self, + composite_key: &CompositeKey, + candidate: &Candidate, + ) -> Result { let midashi = &candidate.midashi; debug!("Select midashi: {:?}", midashi); - let entry = self.dictionary.get_mut(midashi.as_str()); + let dictionary = if candidate.okuri { + &mut self.okuri_ari_dictionary + } else { + &mut self.okuri_nashi_dictionary + }; + + let entry = dictionary.get_mut(midashi.as_str()); match entry { Some(dict_entry) => { - dict_entry.remove_matching_candidate(candidate); - dict_entry.insert_as_first_candidate(candidate.clone()); + dict_entry.prioritize_candidate(composite_key, candidate); } None => { - self.dictionary.insert( - (*candidate.midashi).clone(), - DictEntry { - midashi: (*candidate.midashi).clone(), - candidates: vec![(*candidate).clone()], - }, + dictionary.put( + candidate.midashi.to_owned(), + DictEntry::new(&candidate.midashi, composite_key, candidate), ); } } @@ -96,15 +142,28 @@ impl Dictionary for UserDictionary { Ok(true) } - fn purge_candidate(&mut self, candidate: &Candidate) -> Result { + fn purge_candidate( + &mut self, + composite_key: &CompositeKey, + candidate: &Candidate, + ) -> Result { + let dictionary = if candidate.okuri { + &mut self.okuri_ari_dictionary + } else { + &mut self.okuri_nashi_dictionary + }; let midashi = &candidate.midashi; - let entry = self.dictionary.get_mut(midashi.as_str()); + let entry = dictionary.get_mut(midashi.as_str()); if let Some(dict_entry) = entry { - dict_entry.remove_matching_candidate(candidate); + dict_entry.remove_matching_candidate(composite_key, candidate); } self.has_change = true; Ok(true) } + + fn reload(&mut self) -> Result<(), CskkError> { + FileDictionary::reload(self) + } } impl FileDictionary for UserDictionary { @@ -116,8 +175,18 @@ impl FileDictionary for UserDictionary { &self.encode } - fn set_dictionary(&mut self, dictionary: BTreeMap) { - self.dictionary = dictionary; + fn set_dictionary(&mut self, dictionary: DictionaryEntries) { + let mut okuri_ari_dict = LruCache::unbounded(); + for (k, v) in dictionary.okuri_ari.into_iter().rev() { + okuri_ari_dict.put(k, v); + } + self.okuri_ari_dictionary = okuri_ari_dict; + + let mut okuri_nashi_dict = LruCache::unbounded(); + for (k, v) in dictionary.okuri_nashi.into_iter().rev() { + okuri_nashi_dict.put(k, v); + } + self.okuri_nashi_dictionary = okuri_nashi_dict; } } @@ -153,16 +222,106 @@ fn encode_string(encoder: &mut Encoder, to_encode: &str) -> Result, Cskk #[cfg(test)] mod test { use super::*; + use encoding_rs_io::DecodeReaderBytesBuilder; + use std::io::{BufRead, BufReader}; #[test] fn userdict() -> Result<(), CskkError> { File::create("tests/data/dictionaries/empty.dat")?; let mut user_dictionary = UserDictionary::new("tests/data/dictionaries/empty.dat", "utf-8")?; - let candidate = Candidate::from_skk_jisyo_string("あああ", "アアア;wow").unwrap(); - user_dictionary.select_candidate(&candidate)?; + let candidate = Candidate::new( + "あああ".to_string(), + false, + "アアア".to_string(), + Some("wow".to_string()), + "アアア".to_string(), + ); + let composite_key = CompositeKey::new("あああ", None); + user_dictionary.select_candidate(&composite_key, &candidate)?; + user_dictionary.save_dictionary()?; + user_dictionary.purge_candidate(&composite_key, &candidate)?; + user_dictionary.save_dictionary()?; + Ok(()) + } + + /// Recent select_candidate の順序になっているか + #[test] + fn userdict_ordering() -> Result<(), CskkError> { + let filepath = "tests/data/dictionaries/empty.dat"; + File::create(filepath)?; + let mut user_dictionary = UserDictionary::new(filepath, "utf-8")?; + let candidate = Candidate::new( + "あ".to_string(), + false, + "候補".to_string(), + None, + "候補".to_string(), + ); + let composite_key = CompositeKey::new("あ", None); + user_dictionary.select_candidate(&composite_key, &candidate)?; + let candidate = Candidate::new( + "い".to_string(), + false, + "候補".to_string(), + None, + "候補".to_string(), + ); + let composite_key = CompositeKey::new("い", None); + user_dictionary.select_candidate(&composite_key, &candidate)?; + + let candidate = Candidate::new( + "あb".to_string(), + true, + "候補".to_string(), + None, + "候補".to_string(), + ); + let composite_key = CompositeKey::new("あ", Some("ば".to_string())); + user_dictionary.select_candidate(&composite_key, &candidate)?; + + let candidate = Candidate::new( + "いb".to_string(), + true, + "候補".to_string(), + None, + "候補".to_string(), + ); + let composite_key = CompositeKey::new("い", Some("ば".to_string())); + user_dictionary.select_candidate(&composite_key, &candidate)?; user_dictionary.save_dictionary()?; - user_dictionary.purge_candidate(&candidate)?; + + let saved_file = File::open("tests/data/dictionaries/empty.dat")?; + let enc = Encoding::for_label_no_replacement("utf-8".as_bytes()); + let decoder = DecodeReaderBytesBuilder::new() + .encoding(enc) + .build(saved_file); + let reader = BufReader::new(decoder); + for (i, line) in reader.lines().enumerate() { + if let Ok(line) = line { + match i { + 0 => { + assert!(line.contains(";; okuri-ari entries")) + } + 1 => { + assert!(line.contains("いb /候補/[ば/候補/]/")) + } + 2 => { + assert!(line.contains("あb /候補/[ば/候補/]/")) + } + 3 => { + assert!(line.contains(";; okuri-nasi entries")) + } + 4 => { + assert!(line.contains("い /候補/")) + } + 5 => { + assert!(line.contains("あ /候補/")) + } + _ => {} + } + } + } user_dictionary.save_dictionary()?; Ok(()) } diff --git a/src/form_changer/kana_form_changer.rs b/src/form_changer/kana_form_changer.rs index d98cf6d..70c0294 100644 --- a/src/form_changer/kana_form_changer.rs +++ b/src/form_changer/kana_form_changer.rs @@ -32,90 +32,90 @@ macro_rules! btreemap { }; } lazy_static! { - static ref KANA_ROM_MAP: BTreeMap<&'static str, &'static str> = btreemap![ - ["あ", "a"], - ["い", "i"], - ["う", "u"], - ["え", "e"], - ["お", "o"], - ["か", "k"], - ["き", "k"], - ["く", "k"], - ["け", "k"], - ["こ", "k"], - ["さ", "s"], - ["し", "s"], - ["す", "s"], - ["せ", "s"], - ["そ", "s"], - ["た", "t"], - ["ち", "t"], - ["つ", "t"], - ["て", "t"], - ["と", "t"], - ["な", "n"], - ["に", "n"], - ["ぬ", "n"], - ["ね", "n"], - ["の", "n"], - ["は", "h"], - ["ひ", "h"], - ["ふ", "h"], - ["へ", "h"], - ["ほ", "h"], - ["ま", "m"], - ["み", "m"], - ["む", "m"], - ["め", "m"], - ["も", "m"], - ["や", "y"], - ["ゆ", "y"], - ["よ", "y"], - ["ら", "r"], - ["り", "r"], - ["る", "r"], - ["れ", "r"], - ["ろ", "r"], - ["わ", "w"], - ["ゐ", "x"], - ["ゑ", "x"], - ["を", "w"], - ["ん", "n"], - ["が", "g"], - ["ぎ", "g"], - ["ぐ", "g"], - ["げ", "g"], - ["ご", "g"], - ["ざ", "z"], - ["じ", "z"], // ddskkでは"じ"が送り仮名の場合"j"として処理するのがデフォルト値だが、SKK-JISYO.S等ではzの送り仮名を用いることが多いのでこちらを用いる。 - ["ず", "z"], - ["ぜ", "z"], - ["ぞ", "z"], - ["だ", "d"], - ["ぢ", "d"], - ["づ", "d"], - ["で", "d"], - ["ど", "d"], - ["ば", "b"], - ["び", "b"], - ["ぶ", "b"], - ["べ", "b"], - ["ぼ", "b"], - ["ぱ", "p"], - ["ぴ", "p"], - ["ぷ", "p"], - ["ぺ", "p"], - ["ぽ", "p"], - ["ぁ", "x"], - ["ぃ", "x"], - ["ぅ", "x"], - ["ぇ", "x"], - ["ぉ", "x"], - ["っ", "t"], // ddskk 16.2ではxがデフォルトだが、SKK-JISYO.Lなどでは撥音便の用語はtで収録されているため。'いt'等。 - ["ゃ", "x"], - ["ゅ", "x"], - ["ょ", "x"], - ["ゎ", "x"] + static ref KANA_ROM_MAP: BTreeMap = btreemap![ + ['あ', 'a'], + ['い', 'i'], + ['う', 'u'], + ['え', 'e'], + ['お', 'o'], + ['か', 'k'], + ['き', 'k'], + ['く', 'k'], + ['け', 'k'], + ['こ', 'k'], + ['さ', 's'], + ['し', 's'], + ['す', 's'], + ['せ', 's'], + ['そ', 's'], + ['た', 't'], + ['ち', 't'], + ['つ', 't'], + ['て', 't'], + ['と', 't'], + ['な', 'n'], + ['に', 'n'], + ['ぬ', 'n'], + ['ね', 'n'], + ['の', 'n'], + ['は', 'h'], + ['ひ', 'h'], + ['ふ', 'h'], + ['へ', 'h'], + ['ほ', 'h'], + ['ま', 'm'], + ['み', 'm'], + ['む', 'm'], + ['め', 'm'], + ['も', 'm'], + ['や', 'y'], + ['ゆ', 'y'], + ['よ', 'y'], + ['ら', 'r'], + ['り', 'r'], + ['る', 'r'], + ['れ', 'r'], + ['ろ', 'r'], + ['わ', 'w'], + ['ゐ', 'x'], + ['ゑ', 'x'], + ['を', 'w'], + ['ん', 'n'], + ['が', 'g'], + ['ぎ', 'g'], + ['ぐ', 'g'], + ['げ', 'g'], + ['ご', 'g'], + ['ざ', 'z'], + ['じ', 'z'], // ddskkでは'じ'が送り仮名の場合'j'として処理するのがデフォルト値だが、SKK-JISYO.S等ではzの送り仮名を用いることが多いのでこちらを用いる。 + ['ず', 'z'], + ['ぜ', 'z'], + ['ぞ', 'z'], + ['だ', 'd'], + ['ぢ', 'd'], + ['づ', 'd'], + ['で', 'd'], + ['ど', 'd'], + ['ば', 'b'], + ['び', 'b'], + ['ぶ', 'b'], + ['べ', 'b'], + ['ぼ', 'b'], + ['ぱ', 'p'], + ['ぴ', 'p'], + ['ぷ', 'p'], + ['ぺ', 'p'], + ['ぽ', 'p'], + ['ぁ', 'x'], + ['ぃ', 'x'], + ['ぅ', 'x'], + ['ぇ', 'x'], + ['ぉ', 'x'], + ['っ', 't'], // ddskk 16.2ではxがデフォルトだが、SKK-JISYO.Lなどでは撥音便の用語はtで収録されているため。'いt'等。 + ['ゃ', 'x'], + ['ゅ', 'x'], + ['ょ', 'x'], + ['ゎ', 'x'] ]; } impl KanaFormChanger { @@ -242,7 +242,7 @@ impl KanaFormChanger { /// ひらがな一文字からローマ字の最初のアルファベット一文字を返す。 /// ddskkのskk-rom-kana-vectorの対応。 /// - pub(crate) fn kana_to_okuri_prefix(kana: &str) -> Option<&str> { + pub(crate) fn kana_to_okuri_prefix(kana: &char) -> Option { KANA_ROM_MAP.get(kana).copied() } } @@ -295,9 +295,9 @@ mod tests { #[test] fn kana_to_okuri_prefix() { - assert_eq!(Some("r"), KanaFormChanger::kana_to_okuri_prefix("り")); - assert_eq!(Some("s"), KanaFormChanger::kana_to_okuri_prefix("す")); - assert_eq!(Some("w"), KanaFormChanger::kana_to_okuri_prefix("わ")); + assert_eq!(Some('r'), KanaFormChanger::kana_to_okuri_prefix(&'り')); + assert_eq!(Some('s'), KanaFormChanger::kana_to_okuri_prefix(&'す')); + assert_eq!(Some('w'), KanaFormChanger::kana_to_okuri_prefix(&'わ')); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 075e039..ff5faad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,14 +6,14 @@ extern crate sequence_trie; extern crate serde; #[macro_use] extern crate serde_derive; +extern crate nom; extern crate xkbcommon; use crate::command_handler::ConfigurableCommandHandler; use crate::command_handler::Instruction; use crate::config::CskkConfig; use crate::cskkstate::CskkState; -use crate::dictionary::candidate::Candidate; -use crate::dictionary::file_dictionary::FileDictionary; +use crate::dictionary::Candidate; use crate::dictionary::{ confirm_candidate, get_all_candidates, numeric_entry_count, numeric_string_count, purge_candidate, replace_numeric_string, to_composite_to_numeric_dict_key, CskkDictionary, @@ -168,7 +168,11 @@ pub fn skk_context_set_dictionaries_rs( /// 内部状態なので、Rust libが使用することを想定しない。 /// pub fn skk_context_get_current_to_composite_rs(context: &CskkContext) -> String { - context.current_state_ref().get_composite_key() + context + .current_state_ref() + .get_composite_key() + .get_to_composite() + .to_string() } /// @@ -371,8 +375,8 @@ impl CskkContext { /// 現在のraw_to_compositeから変換候補をリストにして、変換候補を指すポインタを0に戻す。 /// fn update_candidate_list(&mut self) { - let raw_to_composite = self.current_state_ref().get_composite_key(); - let candidates = get_all_candidates(&self.dictionaries, &raw_to_composite); + let composite_key = self.current_state_ref().get_composite_key(); + let candidates = get_all_candidates(&self.dictionaries, &composite_key); self.current_state().set_new_candidate_list(candidates); } @@ -384,8 +388,13 @@ impl CskkContext { .get_current_candidate() { let current_candidate = current_candidate.to_owned(); + let composite_key = self + .current_state_ref() + .get_candidate_list() + .get_current_to_composite() + .to_owned(); for cskkdict in self.dictionaries.iter_mut() { - purge_candidate(cskkdict, ¤t_candidate); + purge_candidate(cskkdict, &composite_key, ¤t_candidate); } } else { log::warn!( @@ -405,9 +414,13 @@ impl CskkContext { .get_current_candidate() { let current_candidate = current_candidate.to_owned(); - + let composite_key = self + .current_state_ref() + .get_candidate_list() + .get_current_to_composite() + .to_owned(); for cskkdict in self.dictionaries.iter_mut() { - confirm_candidate(cskkdict, ¤t_candidate); + confirm_candidate(cskkdict, &composite_key, ¤t_candidate); } let composited_okuri = self.kana_form_changer.adjust_kana_string( @@ -495,12 +508,14 @@ impl CskkContext { current_state.composition_mode = CompositionMode::Direct; let numeric_count = numeric_entry_count(&confirmed); + if numeric_count != 0 && numeric_count == numeric_string_count( current_state .get_candidate_list() - .get_current_to_composite(), + .get_current_to_composite() + .get_to_composite(), ) { // 変換する文字列の数字が確定文字列の数字代理と同数含まれる場合(numeric entry)を辞書登録する。 @@ -517,9 +532,9 @@ impl CskkContext { let mut candidates = vec![]; for output in outputs { candidates.push(Candidate::new( - Arc::new(dict_key.clone()), + dict_key.get_to_composite().to_string(), !self.current_state_ref().get_okuri_string().is_empty(), - Arc::new(confirmed.to_owned()), + confirmed.to_owned(), None, output, )); @@ -529,14 +544,13 @@ impl CskkContext { } else { // numeric entryではない普通の変換候補としてconfirmedを追加する。 let candidates = vec![Candidate::new( - Arc::new( - current_state - .get_candidate_list() - .get_current_to_composite() - .to_string(), - ), + current_state + .get_candidate_list() + .get_current_to_composite() + .get_to_composite() + .to_string(), !self.current_state_ref().get_okuri_string().is_empty(), - Arc::new(confirmed.to_owned()), + confirmed.to_owned(), None, confirmed, )]; @@ -1200,8 +1214,9 @@ impl CskkContext { // remove that from key and enter composition selection mode. let mut done = false; let composite_key = self.current_state_ref().get_composite_key(); + let raw_to_composite = composite_key.get_to_composite(); for suffix in &self.config.auto_start_henkan_keywords.clone() { - if !done && !composite_key.eq(suffix) && composite_key.ends_with(suffix) { + if !done && !raw_to_composite.eq(suffix) && raw_to_composite.ends_with(suffix) { // suffix matched the current composite_key's end // Now remove suffix from composite_key and put it to postfix. for _ in 0..suffix.chars().count() { diff --git a/tests/data/dictionaries/strict_okuri.dat b/tests/data/dictionaries/strict_okuri.dat new file mode 100644 index 0000000..182d42b --- /dev/null +++ b/tests/data/dictionaries/strict_okuri.dat @@ -0,0 +1,2 @@ +ほs /欲/乾/干/補/保/[せ/干/乾/]/[す/欲/]/[さ/補/保/]/ +おくt /送/贈/[って/贈/]/ \ No newline at end of file diff --git a/tests/tests.rs b/tests/tests.rs index 047d34e..f58c988 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1196,7 +1196,9 @@ fn semicolon_entry() { .build(dict_file); let reader = BufReader::new(decoder); for line in reader.lines().flatten() { - assert_eq!(line.chars().filter(|x| x.eq(&';')).count(), 0); + if !line.starts_with(";;") { + assert_eq!(line.chars().filter(|x| x.eq(&';')).count(), 0); + } } } @@ -1278,3 +1280,30 @@ fn preconversion_clear_and_input() { InputMode::Hiragana, ); } + +#[test] +fn strict_okuri_entry() { + init_test_logger(); + let dictpath = "tests/data/dictionaries/strict_okuri.dat"; + let user_dict = CskkDictionary::new_user_dict(dictpath, "utf-8").unwrap(); + let mut context = test_context_with_dictionaries(vec![Arc::new(user_dict)]); + transition_check( + &mut context, + CompositionMode::Direct, + InputMode::Hiragana, + "H o S e", + "▼干せ", + "", + InputMode::Hiragana, + ); + skk_context_reset_rs(&mut context); + transition_check( + &mut context, + CompositionMode::Direct, + InputMode::Hiragana, + "O k u T t e", + "▼贈って", + "", + InputMode::Hiragana, + ); +}