Skip to content

Commit

Permalink
Don't use Dataset::serialize_data
Browse files Browse the repository at this point in the history
  • Loading branch information
dorian-K committed Dec 6, 2024
1 parent 9fd5417 commit 645e702
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions users/dorian_koch/jobs/DatasetToTextDictJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def run(self):
vocab = Vocabulary.create_vocab_from_labels(dataset.labels[self.take_vocab_from_key])
else:
assert dataset.can_serialize_data(self.data_key)
vocab = None
vocab = Vocabulary.create_vocab_from_labels(dataset.labels[self.data_key])

with uopen(self.out_dictionary, "wt") as out:
out.write("{\n")
Expand All @@ -77,10 +77,7 @@ def run(self):
if seq_idx % 10000 == 0:
logging.info(f"seq_idx {seq_idx}")
key = dataset.get_tag(seq_idx)
if vocab:
orth = vocab.get_seq_labels(dataset.get_data(seq_idx, self.data_key))
else:
orth = dataset.serialize_data(key=self.data_key, data=dataset.get_data(seq_idx, self.data_key))
orth = vocab.get_seq_labels(dataset.get_data(seq_idx, self.data_key))
if self.vocab_to_words:
orth = self.vocab_to_words(orth)
out.write("%r: %r,\n" % (key, orth))
Expand Down

0 comments on commit 645e702

Please sign in to comment.