Skip to content

Commit

Permalink
reproduce magpie kwg
Browse files Browse the repository at this point in the history
  • Loading branch information
andy-k committed Mar 30, 2024
1 parent b2fa4f0 commit 476f47e
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 17 deletions.
71 changes: 54 additions & 17 deletions src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ struct StatesDefragger<'a> {
}

impl StatesDefragger<'_> {
fn defrag(&mut self, mut p: u32) {
fn defrag<const WOLGES_MODE: bool>(&mut self, mut p: u32) {
loop {
let prev = self.prev_indexes[p as usize];
if prev == 0 {
Expand All @@ -202,32 +202,53 @@ impl StatesDefragger<'_> {
if self.destination[p as usize] != 0 {
return;
}
let mut initial_num_written = self.num_written;
// temp value to break self-cycles.
self.destination[p as usize] = !0;
let mut write_p = p;
if !WOLGES_MODE {
// non-wolges mode reserves the space first.
loop {
self.num_written += 1;
p = self.states[p as usize].next_index;
if p == 0 {
break;
}
}
p = write_p;
}
let mut num = 0u32;
loop {
num += 1;
let a = self.states[p as usize].arc_index;
if a != 0 {
self.defrag(a);
self.defrag::<WOLGES_MODE>(a);
}
p = self.states[p as usize].next_index;
if p == 0 {
break;
}
}
if WOLGES_MODE {
initial_num_written = self.num_written;
}
self.destination[write_p as usize] = 0;
for ofs in 0..num {
// prefer earlier index, so dawg part does not point to gaddag part
if self.destination[write_p as usize] != 0 {
break;
}
self.destination[write_p as usize] = self.num_written + ofs;
if WOLGES_MODE || ofs == 0 {
self.destination[write_p as usize] = initial_num_written + ofs;
// non-wolges mode does not merge tail nodes.
}
write_p = self.states[write_p as usize].next_index;
}
// Always += num even if some nodes are necessarily duplicated due to sharing by different prev_nodes.
self.num_written += num;
if WOLGES_MODE {
// non-wolges mode already reserves the space.
self.num_written += num;
}
}

// encoding: little endian of
Expand Down Expand Up @@ -268,8 +289,8 @@ impl StatesDefragger<'_> {
0,
);
match build_format {
BuildFormat::DawgOnly => (),
BuildFormat::Gaddawg => {
BuildFormat::DawgOnly | BuildFormat::DawgOnlyMagpie => (),
BuildFormat::Gaddawg | BuildFormat::GaddawgMagpie => {
self.write_node(
&mut ret[4..],
gaddag_start_state,
Expand Down Expand Up @@ -322,6 +343,8 @@ fn gen_prev_indexes(states: &[State]) -> Vec<u32> {
pub enum BuildFormat {
DawgOnly,
Gaddawg,
DawgOnlyMagpie,
GaddawgMagpie,
}

// machine_words must be sorted and unique.
Expand All @@ -345,13 +368,14 @@ pub fn build(
states_finder: &mut states_finder,
};
let dawg_start_state = match build_format {
BuildFormat::DawgOnly | BuildFormat::Gaddawg => {
state_maker.make_dawg(machine_words, 0, false)
}
BuildFormat::DawgOnly
| BuildFormat::Gaddawg
| BuildFormat::DawgOnlyMagpie
| BuildFormat::GaddawgMagpie => state_maker.make_dawg(machine_words, 0, false),
};
let gaddag_start_state = match build_format {
BuildFormat::DawgOnly => 0,
BuildFormat::Gaddawg => state_maker.make_dawg(
BuildFormat::DawgOnly | BuildFormat::DawgOnlyMagpie => 0,
BuildFormat::Gaddawg | BuildFormat::GaddawgMagpie => state_maker.make_dawg(
&gen_machine_drowwords(machine_words),
dawg_start_state,
true,
Expand All @@ -360,19 +384,32 @@ pub fn build(

let mut states_defragger = StatesDefragger {
states: &states,
prev_indexes: &gen_prev_indexes(&states),
prev_indexes: &match build_format {
BuildFormat::DawgOnly | BuildFormat::Gaddawg => gen_prev_indexes(&states),
BuildFormat::DawgOnlyMagpie | BuildFormat::GaddawgMagpie => vec![0u32; states.len()],
},
destination: &mut vec![0u32; states.len()],
num_written: match build_format {
BuildFormat::DawgOnly => 1,
BuildFormat::Gaddawg => 2,
BuildFormat::DawgOnly | BuildFormat::DawgOnlyMagpie => 1,
BuildFormat::Gaddawg | BuildFormat::GaddawgMagpie => 2,
},
};
states_defragger.destination[0] = !0; // useful for empty lexicon
states_defragger.defrag(dawg_start_state);
match build_format {
BuildFormat::DawgOnly => (),
BuildFormat::DawgOnly | BuildFormat::Gaddawg => {
states_defragger.defrag::<true>(dawg_start_state)
}
BuildFormat::DawgOnlyMagpie | BuildFormat::GaddawgMagpie => {
states_defragger.defrag::<false>(dawg_start_state)
}
}
match build_format {
BuildFormat::DawgOnly | BuildFormat::DawgOnlyMagpie => (),
BuildFormat::Gaddawg => {
states_defragger.defrag(gaddag_start_state);
states_defragger.defrag::<true>(gaddag_start_state);
}
BuildFormat::GaddawgMagpie => {
states_defragger.defrag::<false>(gaddag_start_state);
}
}
states_defragger.destination[0] = 0; // useful for empty lexicon
Expand Down
61 changes: 61 additions & 0 deletions src/main_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,66 @@ fn do_lang<AlphabetMaker: Fn() -> alphabet::Alphabet>(
)?)?;
Ok(true)
}
"-kwg-magpie" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::GaddawgMagpie,
&read_machine_words(
&alphabet::AlphabetReader::new_for_words(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?,
)?)?;
Ok(true)
}
"-kwg-magpie-dawg" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::DawgOnlyMagpie,
&read_machine_words(
&alphabet::AlphabetReader::new_for_words(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?,
)?)?;
Ok(true)
}
"-kwg-magpie-alpha" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::DawgOnlyMagpie,
&build::make_alphagrams(&read_machine_words(
&alphabet::AlphabetReader::new_for_words(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?),
)?)?;
Ok(true)
}
"-kwg-magpie-score" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::GaddawgMagpie,
&read_machine_words(
&alphabet::AlphabetReader::new_for_word_scores(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?,
)?)?;
Ok(true)
}
"-kwg-magpie-score-dawg" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::DawgOnlyMagpie,
&read_machine_words(
&alphabet::AlphabetReader::new_for_word_scores(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?,
)?)?;
Ok(true)
}
"-kwg-magpie-score-alpha" => {
make_writer(&args[3])?.write_all(&build::build(
build::BuildFormat::DawgOnlyMagpie,
&build::make_alphagrams(&read_machine_words(
&alphabet::AlphabetReader::new_for_word_scores(&make_alphabet()),
&read_to_string(&mut make_reader(&args[2])?)?,
)?),
)?)?;
Ok(true)
}
"-macondo" => {
let alphabet = make_alphabet();
let kwg = kwg::Kwg::from_bytes_alloc(&std::fs::read(&args[2])?);
Expand Down Expand Up @@ -304,6 +364,7 @@ fn main() -> error::Returns<()> {
english-kwg-score-alpha CSW21.txt CSW21.kad
english-kwg-score-dawg CSW21.txt outfile.dwg
same as above but with representative same-score tiles
(english-kwg can also be english-kwg-magpie for bigger magpie-style kwg)
(english can also be catalan, french, german, norwegian, polish, slovene,
spanish, yupik)
input/output files can be \"-\" (not advisable for binary files)"
Expand Down

0 comments on commit 476f47e

Please sign in to comment.