Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion panphon/_panphon.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def _read_table(self, filename: str) -> tuple[
def _read_weights(self, filename: str = os.path.join(
'data', 'feature_weights.csv')
) -> List[float]:
with files('panphon').joinpath(filename).open() as f:
with files('panphon').joinpath(filename).open(encoding='utf-8') as f:
df = pd.read_csv(f)
weights = df.iloc[0].astype(float).tolist()
return weights
Expand Down
71 changes: 41 additions & 30 deletions panphon/bin/generate_ipa_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def __init__(self, form, features):

def __repr__(self):
"""Output string representation of Segment."""
return 'Segment("{}", {})'.format(self.form,
repr(self.features))
return 'Segment("{}", {})'.format(self.form, repr(self.features))

def feature_vector(self, feature_names):
"""Return feature vector for segment.
Expand All @@ -39,7 +38,7 @@ def feature_vector(self, feature_names):


class Diacritic(object):
"""An object encapsulating a diacritics properties."""
"""An object encapsulating a diacritic's properties."""

def __init__(self, marker, position, conditions, exclude, content):
"""Construct a diacritic object.
Expand All @@ -57,7 +56,7 @@ def __init__(self, marker, position, conditions, exclude, content):
is applied
"""
self.marker = marker
assert position in ['pre', 'post']
assert position in ["pre", "post"]
self.position = position
self.exclude = exclude
self.conditions = conditions
Expand All @@ -66,6 +65,9 @@ def __init__(self, marker, position, conditions, exclude, content):
def match(self, segment):
if segment.form not in self.exclude:
for condition in self.conditions:
# Skip empty conditions - they should not match all segments
if not condition:
continue
if set(condition.items()) <= set(segment.features.items()):
return True
return False
Expand All @@ -77,10 +79,10 @@ def apply(self, segment):
new_seg = copy.deepcopy(segment)
for k, v in self.content.items():
new_seg.features[k] = v
if self.position == 'post':
new_seg.form = '{}{}'.format(new_seg.form, self.marker)
if self.position == "post":
new_seg.form = "{}{}".format(new_seg.form, self.marker)
else:
new_seg.form = '{}{}'.format(self.marker, new_seg.form)
new_seg.form = "{}{}".format(self.marker, new_seg.form)
return new_seg
else:
return None
Expand All @@ -103,11 +105,11 @@ def apply(self, segment):

def read_ipa_bases(ipa_bases):
segments = []
with open(ipa_bases, 'r', encoding='utf-8') as f:
with open(ipa_bases, "r", encoding="utf-8") as f:
dictreader = csv.DictReader(f)
for record in dictreader:
form = record['ipa']
features = {k: v for k, v in record.items() if k != 'ipa'}
form = record["ipa"]
features = {k: v for k, v in record.items() if k != "ipa"}
segments.append(Segment(form, features))
return segments

Expand All @@ -116,42 +118,42 @@ def parse_dia_defs(dia_defs):
with open(dia_defs, "r", encoding="utf-8") as f:
defs = yaml.load(f.read(), Loader=yaml.FullLoader)
diacritics = {}
for dia in defs['diacritics']:
if 'exclude' in dia:
exclude = dia['exclude']
for dia in defs["diacritics"]:
if "exclude" in dia:
exclude = dia["exclude"]
else:
exclude = []
diacritics[dia['name']] = Diacritic(dia['marker'], dia['position'],
dia['conditions'], exclude,
dia['content'])
diacritics[dia["name"]] = Diacritic(
dia["marker"], dia["position"], dia["conditions"], exclude, dia["content"]
)
combinations = []
for comb in defs['combinations']:
combinations.append(Combination(diacritics, comb['name'],
comb['combines']))
for comb in defs["combinations"]:
combinations.append(Combination(diacritics, comb["name"], comb["combines"]))
return diacritics, combinations


def sort_all_segments(sort_order, all_segments):
all_segments_list = list(all_segments)
with open(sort_order, 'r', encoding='utf-8') as f:
with open(sort_order, "r", encoding="utf-8") as f:
field_order = reversed(yaml.load(f.read(), Loader=yaml.FullLoader))
for field in field_order:
all_segments_list.sort(key=lambda seg: seg.features[field['name']],
reverse=field['reverse'])
all_segments_list.sort(
key=lambda seg: seg.features[field["name"]], reverse=field["reverse"]
)
return all_segments_list


def write_ipa_all(ipa_bases, ipa_all, all_segments, sort_order):
with open(ipa_bases, 'r', encoding='utf-8') as f:
with open(ipa_bases, "r", encoding="utf-8") as f:
reader = csv.reader(f)
fieldnames = next(reader)
with open(ipa_all, 'w', encoding='utf-8', newline='') as f:
with open(ipa_all, "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
all_segments_list = sort_all_segments(sort_order, all_segments)
for segment in all_segments_list:
fields = copy.copy(segment.features)
fields['ipa'] = segment.form
fields["ipa"] = segment.form
writer.writerow(fields)


Expand All @@ -175,13 +177,22 @@ def main(ipa_bases, ipa_all, dia_defs, sort_order):
def cli_main():
"""Entry point for the generate_ipa_all script."""
parser = argparse.ArgumentParser()
parser.add_argument('bases', help='File containing IPA bases (ipa_bases.csv)')
parser.add_argument('all', help='File to which all IPA segments is to be written (ipa_all.csv)')
parser.add_argument('-d', '--dia', required=True, help='Diacritic definition file (default=diacritic_definitions.yml)')
parser.add_argument('-s', '--sort-order', required=True, help='File definiting sort order.')
parser.add_argument("bases", help="File containing IPA bases (ipa_bases.csv)")
parser.add_argument(
"all", help="File to which all IPA segments is to be written (ipa_all.csv)"
)
parser.add_argument(
"-d",
"--dia",
required=True,
help="Diacritic definition file (default=diacritic_definitions.yml)",
)
parser.add_argument(
"-s", "--sort-order", required=True, help="File definiting sort order."
)
args = parser.parse_args()
main(args.bases, args.all, args.dia, args.sort_order)


if __name__ == '__main__':
if __name__ == "__main__":
cli_main()
2 changes: 1 addition & 1 deletion panphon/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(

def _load_table(self, tablename: str):
fn = os.path.join('data', tablename)
with files('panphon').joinpath(fn).open('r') as f:
with files('panphon').joinpath(fn).open('r', encoding='utf-8') as f:
rules = []
table = yaml.load(f.read(), Loader=yaml.FullLoader)
for rule in table:
Expand Down
Loading