From 6bf4ad20abdb61d2f2fd7a2661612d29bc62628e Mon Sep 17 00:00:00 2001 From: Aidan Pine Date: Fri, 9 Aug 2024 13:42:03 -0700 Subject: [PATCH 1/2] fix: import Mapping from typing module in python 3.8 you cannot subscript the collections.abc.Mapping class so importing from typing is more stable across python versions see https://stackoverflow.com/questions/59955751/abcmeta-object-is-not-subscriptable-when-trying-to-annotate-a-hash-variable --- panphon/segment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/panphon/segment.py b/panphon/segment.py index ab42fc1..63027e0 100644 --- a/panphon/segment.py +++ b/panphon/segment.py @@ -2,8 +2,7 @@ from __future__ import annotations -from collections.abc import Iterator, Iterable, Mapping -from typing import TypeVar +from typing import Iterator, Mapping, TypeVar import regex as re T = TypeVar('T') From c696f92cad20bb6ecf412fe8af568973ca180b79 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Fri, 23 Aug 2024 10:40:05 -0400 Subject: [PATCH 2/2] Three small fixes for panphon on Windows and Python 3.12 1) On Python 3.12, I was getting infinite recursion in featuretable.py because sort_segments() used self.segments which called segments() and thus sort_segments() again. Using the underlying self._segments instead in sort_segments() removes the infinite recursion. 2) Also in featuretable.py, `with open(fn) as f:` on line 79 assumes utf-8 on Linux, but it assumes cp-1252 on my Windows machine. Declaring the encoding explicitly fixed the problem. For code portability, it's unfortunately and very annoyingly required to always declare the encoding when opening a file in text mode in Python. There are other places in the code where text-mode open statements don't declare the encoding. I'm not sure why those did not cause problems in my tests. 3) Minor warning from Python 3.12: in the docstring, the \ in \w has to be escaped so that `help(Segments)` get printed correctly. --- panphon/featuretable.py | 5 ++--- panphon/segment.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/panphon/featuretable.py b/panphon/featuretable.py index 0800b79..7659ee3 100644 --- a/panphon/featuretable.py +++ b/panphon/featuretable.py @@ -34,7 +34,7 @@ def segments(self): return self._segments def sort_segments(self): - self.segments.sort(key=self.segment_key) + self._segments.sort(key=self.segment_key) @staticmethod def segment_key(segment_tuple): @@ -76,7 +76,7 @@ def normalize(data: str) -> str: def _read_bases(self, fn: str, weights): fn = pkg_resources.resource_filename(__name__, fn) segments = [] - with open(fn) as f: + with open(fn, encoding="utf8") as f: reader = csv.reader(f) header = next(reader) names = header[1:] @@ -585,4 +585,3 @@ def vector_list_to_word(self, tensor, xsampa=False,fuzzy_search=False): word = self.xsampa.convert(word) return word - diff --git a/panphon/segment.py b/panphon/segment.py index 63027e0..85f536d 100644 --- a/panphon/segment.py +++ b/panphon/segment.py @@ -12,7 +12,7 @@ class Segment(Mapping[str, int]): :param names list[str]: An ordered list of feature names. :param feature dict[str, int]: name-feature pairs for specified features. - :param ftstr str: A string, each /(+|0|-)\w+/ sequence of which is interpreted as a feature specification. + :param ftstr str: A string, each /(+|0|-)\\w+/ sequence of which is interpreted as a feature specification. :param weights list[float]: An ordered list of feature weights/saliences. """ def __init__(self, names: list[str], features: dict[str, int]={}, ftstr: str='', weights: "list[float]"=[]):