Skip to content

Commit 98a4880

Browse files
committed
Bug fixes lru_cache support for Python3.6.x and Python3.7.x
1 parent 7840805 commit 98a4880

File tree

1 file changed

+241
-8
lines changed

1 file changed

+241
-8
lines changed

geolang/__init__.py

Lines changed: 241 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,244 @@
1+
# -*- coding: utf-8 -*
2+
13
"""
2-
initialization package imports
4+
Georgian Language Toolkit for Python 3
5+
6+
Source: <https://github.com/Lh4cKg/simple-geolang-toolkit>
37
"""
48

5-
from geolang.geolang import (
6-
__author__,
7-
__version__,
8-
encode_slugify,
9-
encode_text,
10-
GeoLangToolKit,
11-
)
9+
import re
10+
from typing import Dict, Any, Iterable, Union, List, Tuple
11+
from functools import lru_cache, partial
12+
from unicodedata import normalize
13+
14+
15+
__author__ = 'Lasha Gogua'
16+
__email__ = 'Lh4cKg@gmail.com'
17+
__version__ = '0.2.1'
18+
19+
__all__ = ['GeoLangToolKit', 'encode_slugify', 'encode_text']
20+
21+
22+
class GeoLangToolKit(object):
23+
24+
def __init__(
25+
self,
26+
latin_script: Union[str, List[str], Tuple[Iterable[str]]] = None
27+
) -> None:
28+
"""
29+
Romanization of Georgian is the process of transliterating the Georgian
30+
language from the Georgian script into the Latin script.
31+
32+
default script is National:
33+
თ - t
34+
კ - k'
35+
ტ - t'
36+
ფ - p
37+
ქ - k
38+
პ - p'
39+
ჟ - zh
40+
ღ - gh
41+
ყ - q'
42+
შ - sh
43+
ჩ - ch
44+
ც - ts
45+
ძ - dz
46+
წ - ts'
47+
ჭ - ch'
48+
"""
49+
self.ka_script: str = 'აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ'
50+
51+
if isinstance(latin_script, (list, tuple)):
52+
self.latin_script = latin_script
53+
elif isinstance(latin_script, str):
54+
if len(latin_script) < 33:
55+
raise ValueError(
56+
'Wrong latin script characters, available list, '
57+
'tuple or comma separated string, max length 33.'
58+
)
59+
else:
60+
self.latin_script = latin_script.split(',')
61+
else:
62+
self.latin_script: Iterable[str] = (
63+
'a', 'b', 'g', 'd', 'e', 'v', 'z', 't', 'i', 'k', 'l', 'm',
64+
'n', 'o', 'p', 'zh', 'r', 's', 't', 'u', 'p', 'k', 'gh', 'q',
65+
'sh', 'ch', 'ts', 'dz', 'ts', 'ch', 'kh', 'j', 'h'
66+
)
67+
68+
@property
69+
@lru_cache(maxsize=None)
70+
def ka2lat_map(self) -> Dict[str, str]:
71+
"""
72+
73+
:return characters map of georgian to latin
74+
"""
75+
76+
return {ka: lat for ka, lat in zip(self.ka_script, self.latin_script)}
77+
78+
@property
79+
@lru_cache(maxsize=None)
80+
def lat2ka_map(self) -> Dict[str, str]:
81+
"""
82+
83+
:return georgian characters map of latin to georgian
84+
"""
85+
86+
return {lat: ka for lat, ka in zip(self.latin_script, self.ka_script)}
87+
88+
def lat2ka(self, value: str, na_value: str = None) -> str:
89+
"""
90+
convert the given string from latin into georgian chars
91+
92+
:param value: Georgian or Latin text
93+
:param na_value: N/A value if could not find character, default None.
94+
95+
:return
96+
97+
>>> # example
98+
>>> self.lat2ka('laSas uyvars ana da piToni lol ))')
99+
"ლაSას უyვარს ანა და ფიTონი ლოლ ))"
100+
"""
101+
102+
chars = list()
103+
i = 0
104+
while i < len(value):
105+
char = value[i]
106+
try:
107+
chars.append(self.lat2ka_map[char])
108+
except KeyError:
109+
if na_value:
110+
chars.append(na_value)
111+
else:
112+
chars.append(char)
113+
i += 1
114+
115+
return ''.join(chars)
116+
117+
def ka2lat(self, value: str, na_value: str = None) -> str:
118+
"""
119+
convert the given name from georgian into latin chars
120+
121+
:param value: Georgian or Latin text
122+
:param na_value: N/A value if could not find character, default None.
123+
124+
:return
125+
126+
>>> # example
127+
>>> self.ka2lat('მე მიყვარს ანა!')
128+
"me miqvars ana!"
129+
"""
130+
131+
chars = list()
132+
i = 0
133+
while i < len(value):
134+
char = value[i]
135+
try:
136+
chars.append(self.ka2lat_map[char])
137+
except KeyError:
138+
if na_value:
139+
chars.append(na_value)
140+
else:
141+
chars.append(char)
142+
i += 1
143+
144+
return ''.join(chars)
145+
146+
def _replace_str(self, ka2latin: bool, match: re.Match) -> str:
147+
"""
148+
replace strings
149+
"""
150+
151+
char = match.group()
152+
153+
if ka2latin and char in self.ka2lat_map:
154+
return self.ka2lat_map[char]
155+
156+
return char
157+
158+
@staticmethod
159+
def _slugify(value: Any) -> str:
160+
"""
161+
Converts to ASCII. Converts spaces to hyphens.
162+
Removes characters that
163+
aren't alphanumerics, underscores, or hyphens.
164+
Also strips leading and trailing whitespace.
165+
166+
"""
167+
168+
if isinstance(value, bytes):
169+
s = str(value, 'utf-8', 'strict')
170+
else:
171+
s = str(value)
172+
s = normalize('NFKD', s).encode('ascii', 'ignore').decode('ascii')
173+
return re.sub(r'[-\s]+', '-', re.sub(r'[^\w\s-]', '', s).strip())
174+
175+
def encode_slugify(self, value: str, ka2latin: bool = False) -> str:
176+
"""
177+
178+
Convert Georgian letters to latin if 'ka2latin' is True.
179+
Convert spaces to hyphens.
180+
Remove characters that aren't alphanumerics, underscores, or hyphens.
181+
Convert to lowercase. Also strip leading and trailing whitespace.
182+
183+
:param value: Georgian or Latin text
184+
:param ka2latin: if True, value with Georgian letters will be converted
185+
to Latin letters, default False.
186+
:return:
187+
188+
>>> encode_slugify("მე\'მიყვარს-ანი და ის/ჩემი ცხოვბრებაა! ჩ", True)
189+
"memiqvars-ani-da-ischemi-tskhovbrebaa-ch"
190+
>>> encode_slugify("პითონი და ჯანგო")
191+
>>> encode_slugify("adé\jcà lr\\rr'huété") # could not find unicode
192+
"adé\jcà lr\\rr'huété"
193+
>>> encode_slugify("更新时间") # could not find unicode
194+
"更新时间"
195+
196+
"""
197+
198+
if isinstance(value, bytes):
199+
value = str(value, 'utf-8', 'strict')
200+
else:
201+
value = str(value)
202+
203+
replace_str = partial(self._replace_str, ka2latin)
204+
s = re.sub(r'[^a-zA-Z0-9\\s\\-]{1}', replace_str, value)
205+
206+
return re.sub(r'[-\s]+', '-', re.sub(r'[^\w\s-]', '', s).strip().lower())
207+
208+
def encode_text(
209+
self,
210+
value: str,
211+
ka2latin: bool = True,
212+
latin2ka: bool = False,
213+
na_value: str = None) -> str:
214+
"""
215+
216+
:param value: Georgian or Latin text
217+
:param ka2latin: if True, value with Georgian letters will be converted
218+
to Latin letters, default True.
219+
:param latin2ka: if True, value with Latin letters will be converted
220+
to Georgian letters, default False.
221+
:param na_value: N/A value if could not find character, default None.
222+
:return: georgian or latin letters
223+
"""
224+
225+
if not ka2latin and not latin2ka:
226+
raise ValueError(
227+
'Missing required argument, '
228+
'Choose one `ka2latin` or `latin2ka`'
229+
)
230+
231+
if isinstance(value, bytes):
232+
value = str(value, 'utf-8', 'strict')
233+
else:
234+
value = str(value)
235+
236+
if latin2ka:
237+
return self.lat2ka(value, na_value)
238+
239+
return self.ka2lat(value, na_value)
240+
241+
242+
instance: GeoLangToolKit = GeoLangToolKit()
243+
encode_slugify = instance.encode_slugify
244+
encode_text = instance.encode_text

0 commit comments

Comments
 (0)