Skip to content

Commit

Permalink
Remove self.injected_charlist
Browse files Browse the repository at this point in the history
  • Loading branch information
TimKoornstra committed Mar 7, 2024
1 parent 9c4d49b commit b364a63
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions src/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self,
self.config = config

# TODO: Make this more clear
self.injected_charlist = charlist
self.charlist = charlist

self.evaluation_list = None
Expand Down Expand Up @@ -73,9 +72,8 @@ def _process_raw_data(self):
)

# Determine the character list for the tokenizer
if self.injected_charlist and not self.config['replace_final_layer']:
if self.charlist and not self.config['replace_final_layer']:
logging.info('Using injected charlist')
self.charlist = self.injected_charlist
else:
self.charlist = sorted(list(characters))

Expand Down Expand Up @@ -193,9 +191,9 @@ def create_data(self, characters, labels, partitions, partition_name,
# unsupported characters in the training set as well
if not include_unsupported_chars \
and (partition_name != 'train'
or self.injected_charlist):
or self.charlist):
for char in ground_truth:
if char not in self.injected_charlist and \
if char not in self.charlist and \
char not in characters:
logging.warning("Unsupported character: "
f"{char} in {ground_truth}. "
Expand All @@ -217,7 +215,7 @@ def create_data(self, characters, labels, partitions, partition_name,
partitions[partition_name].append(file_name)
labels[partition_name].append(ground_truth)
processed_files.append([file_name, ground_truth])
if (not self.injected_charlist or
if (not self.charlist or
self.config['replace_final_layer']) \
and partition_name == 'train':
characters = characters.union(
Expand Down

0 comments on commit b364a63

Please sign in to comment.