Skip to content

Commit

Permalink
fix language codes
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-kubica committed Jul 27, 2023
1 parent 872510d commit 3377ee5
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 299 deletions.
Binary file added .DS_Store
Binary file not shown.
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a
* ``year``
* ``currency``

**lang:** The language in which to convert the number. Supported values are:
**lang:** The language in which to convert the number, in the ISO 639-1 code format. Supported values are:

* ``en`` (English, default)
* ``am`` (Amharic)
* ``ar`` (Arabic)
* ``az`` (Azerbaijani)
* ``cz`` (Czech)
* ``cs`` (Czech)
* ``de`` (German)
* ``dk`` (Danish)
* ``da`` (Danish)
* ``en_GB`` (English - Great Britain)
* ``en_IN`` (English - India)
* ``en_NG`` (English - Nigeria)
Expand All @@ -104,7 +104,7 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a
* ``ja`` (Japanese)
* ``kn`` (Kannada)
* ``ko`` (Korean)
* ``kz`` (Kazakh)
* ``kk`` (Kazakh)
* ``lt`` (Lithuanian)
* ``lv`` (Latvian)
* ``no`` (Norwegian)
Expand Down
162 changes: 102 additions & 60 deletions num2words/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,70 +17,112 @@

from __future__ import unicode_literals

from . import (lang_AM, lang_AR, lang_AZ, lang_CZ, lang_DE, lang_DK, lang_EN,
lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO,
lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR,
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID,
lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT,
lang_LV, lang_NL, lang_NO, lang_PL, lang_PT, lang_PT_BR,
lang_RO, lang_RU, lang_SL, lang_SR, lang_SV, lang_TE, lang_TG,
lang_TH, lang_TR, lang_UK, lang_VI)
from . import (
lang_AM,
lang_AR,
lang_AZ,
lang_CS,
lang_DA,
lang_DE,
lang_EN,
lang_EN_IN,
lang_EN_NG,
lang_EO,
lang_ES,
lang_ES_CO,
lang_ES_GT,
lang_ES_NI,
lang_ES_VE,
lang_FA,
lang_FI,
lang_FR,
lang_FR_BE,
lang_FR_CH,
lang_FR_DZ,
lang_HE,
lang_HU,
lang_ID,
lang_IS,
lang_IT,
lang_JA,
lang_KK,
lang_KN,
lang_KO,
lang_LT,
lang_LV,
lang_NL,
lang_NO,
lang_PL,
lang_PT,
lang_PT_BR,
lang_RO,
lang_RU,
lang_SL,
lang_SR,
lang_SV,
lang_TE,
lang_TG,
lang_TH,
lang_TR,
lang_UK,
lang_VI,
)

CONVERTER_CLASSES = {
'am': lang_AM.Num2Word_AM(),
'ar': lang_AR.Num2Word_AR(),
'az': lang_AZ.Num2Word_AZ(),
'cz': lang_CZ.Num2Word_CZ(),
'en': lang_EN.Num2Word_EN(),
'en_IN': lang_EN_IN.Num2Word_EN_IN(),
'en_NG': lang_EN_NG.Num2Word_EN_NG(),
'fa': lang_FA.Num2Word_FA(),
'fr': lang_FR.Num2Word_FR(),
'fr_CH': lang_FR_CH.Num2Word_FR_CH(),
'fr_BE': lang_FR_BE.Num2Word_FR_BE(),
'fr_DZ': lang_FR_DZ.Num2Word_FR_DZ(),
'de': lang_DE.Num2Word_DE(),
'fi': lang_FI.Num2Word_FI(),
'eo': lang_EO.Num2Word_EO(),
'es': lang_ES.Num2Word_ES(),
'es_CO': lang_ES_CO.Num2Word_ES_CO(),
'es_GT': lang_ES_GT.Num2Word_ES_GT(),
'es_NI': lang_ES_NI.Num2Word_ES_NI(),
'es_VE': lang_ES_VE.Num2Word_ES_VE(),
'id': lang_ID.Num2Word_ID(),
'ja': lang_JA.Num2Word_JA(),
'kn': lang_KN.Num2Word_KN(),
'ko': lang_KO.Num2Word_KO(),
'kz': lang_KZ.Num2Word_KZ(),
'lt': lang_LT.Num2Word_LT(),
'lv': lang_LV.Num2Word_LV(),
'pl': lang_PL.Num2Word_PL(),
'ro': lang_RO.Num2Word_RO(),
'ru': lang_RU.Num2Word_RU(),
'sl': lang_SL.Num2Word_SL(),
'sr': lang_SR.Num2Word_SR(),
'sv': lang_SV.Num2Word_SV(),
'no': lang_NO.Num2Word_NO(),
'dk': lang_DK.Num2Word_DK(),
'pt': lang_PT.Num2Word_PT(),
'pt_BR': lang_PT_BR.Num2Word_PT_BR(),
'he': lang_HE.Num2Word_HE(),
'it': lang_IT.Num2Word_IT(),
'vi': lang_VI.Num2Word_VI(),
'tg': lang_TG.Num2Word_TG(),
'th': lang_TH.Num2Word_TH(),
'tr': lang_TR.Num2Word_TR(),
'nl': lang_NL.Num2Word_NL(),
'uk': lang_UK.Num2Word_UK(),
'te': lang_TE.Num2Word_TE(),
'hu': lang_HU.Num2Word_HU(),
'is': lang_IS.Num2Word_IS()
"am": lang_AM.Num2Word_AM(),
"ar": lang_AR.Num2Word_AR(),
"az": lang_AZ.Num2Word_AZ(),
"cs": lang_CS.Num2Word_CS(),
"en": lang_EN.Num2Word_EN(),
"en_IN": lang_EN_IN.Num2Word_EN_IN(),
"en_NG": lang_EN_NG.Num2Word_EN_NG(),
"fa": lang_FA.Num2Word_FA(),
"fr": lang_FR.Num2Word_FR(),
"fr_CH": lang_FR_CH.Num2Word_FR_CH(),
"fr_BE": lang_FR_BE.Num2Word_FR_BE(),
"fr_DZ": lang_FR_DZ.Num2Word_FR_DZ(),
"de": lang_DE.Num2Word_DE(),
"fi": lang_FI.Num2Word_FI(),
"eo": lang_EO.Num2Word_EO(),
"es": lang_ES.Num2Word_ES(),
"es_CO": lang_ES_CO.Num2Word_ES_CO(),
"es_GT": lang_ES_GT.Num2Word_ES_GT(),
"es_NI": lang_ES_NI.Num2Word_ES_NI(),
"es_VE": lang_ES_VE.Num2Word_ES_VE(),
"id": lang_ID.Num2Word_ID(),
"ja": lang_JA.Num2Word_JA(),
"kn": lang_KN.Num2Word_KN(),
"ko": lang_KO.Num2Word_KO(),
"kk": lang_KK.Num2Word_KK(),
"lt": lang_LT.Num2Word_LT(),
"lv": lang_LV.Num2Word_LV(),
"pl": lang_PL.Num2Word_PL(),
"ro": lang_RO.Num2Word_RO(),
"ru": lang_RU.Num2Word_RU(),
"sl": lang_SL.Num2Word_SL(),
"sr": lang_SR.Num2Word_SR(),
"sv": lang_SV.Num2Word_SV(),
"no": lang_NO.Num2Word_NO(),
"da": lang_DA.Num2Word_DA(),
"pt": lang_PT.Num2Word_PT(),
"pt_BR": lang_PT_BR.Num2Word_PT_BR(),
"he": lang_HE.Num2Word_HE(),
"it": lang_IT.Num2Word_IT(),
"vi": lang_VI.Num2Word_VI(),
"tg": lang_TG.Num2Word_TG(),
"th": lang_TH.Num2Word_TH(),
"tr": lang_TR.Num2Word_TR(),
"nl": lang_NL.Num2Word_NL(),
"uk": lang_UK.Num2Word_UK(),
"te": lang_TE.Num2Word_TE(),
"hu": lang_HU.Num2Word_HU(),
"is": lang_IS.Num2Word_IS(),
}

CONVERTES_TYPES = ['cardinal', 'ordinal', 'ordinal_num', 'year', 'currency']
CONVERTES_TYPES = ["cardinal", "ordinal", "ordinal_num", "year", "currency"]


def num2words(number, ordinal=False, lang='en', to='cardinal', **kwargs):
def num2words(number, ordinal=False, lang="en", to="cardinal", **kwargs):
# We try the full language first
if lang not in CONVERTER_CLASSES:
# ... and then try only the first 2 letters
Expand All @@ -94,9 +136,9 @@ def num2words(number, ordinal=False, lang='en', to='cardinal', **kwargs):

# backwards compatible
if ordinal:
to = 'ordinal'
to = "ordinal"

if to not in CONVERTES_TYPES:
raise NotImplementedError()

return getattr(converter, 'to_{}'.format(to))(number, **kwargs)
return getattr(converter, "to_{}".format(to))(number, **kwargs)
123 changes: 60 additions & 63 deletions num2words/lang_CZ.py → num2words/lang_CS.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,95 +20,92 @@
from .base import Num2Word_Base
from .utils import get_digits, splitbyx

ZERO = ('nula',)
ZERO = ("nula",)

ONES = {
1: ('jedna',),
2: ('dva',),
3: ('tři',),
4: ('čtyři',),
5: ('pět',),
6: ('šest',),
7: ('sedm',),
8: ('osm',),
9: ('devět',),
1: ("jedna",),
2: ("dva",),
3: ("tři",),
4: ("čtyři",),
5: ("pět",),
6: ("šest",),
7: ("sedm",),
8: ("osm",),
9: ("devět",),
}

TENS = {
0: ('deset',),
1: ('jedenáct',),
2: ('dvanáct',),
3: ('třináct',),
4: ('čtrnáct',),
5: ('patnáct',),
6: ('šestnáct',),
7: ('sedmnáct',),
8: ('osmnáct',),
9: ('devatenáct',),
0: ("deset",),
1: ("jedenáct",),
2: ("dvanáct",),
3: ("třináct",),
4: ("čtrnáct",),
5: ("patnáct",),
6: ("šestnáct",),
7: ("sedmnáct",),
8: ("osmnáct",),
9: ("devatenáct",),
}

TWENTIES = {
2: ('dvacet',),
3: ('třicet',),
4: ('čtyřicet',),
5: ('padesát',),
6: ('šedesát',),
7: ('sedmdesát',),
8: ('osmdesát',),
9: ('devadesát',),
2: ("dvacet",),
3: ("třicet",),
4: ("čtyřicet",),
5: ("padesát",),
6: ("šedesát",),
7: ("sedmdesát",),
8: ("osmdesát",),
9: ("devadesát",),
}

HUNDREDS = {
1: ('sto',),
2: ('dvěstě',),
3: ('třista',),
4: ('čtyřista',),
5: ('pětset',),
6: ('šestset',),
7: ('sedmset',),
8: ('osmset',),
9: ('devětset',),
1: ("sto",),
2: ("dvěstě",),
3: ("třista",),
4: ("čtyřista",),
5: ("pětset",),
6: ("šestset",),
7: ("sedmset",),
8: ("osmset",),
9: ("devětset",),
}

THOUSANDS = {
1: ('tisíc', 'tisíce', 'tisíc'), # 10^3
2: ('milion', 'miliony', 'milionů'), # 10^6
3: ('miliarda', 'miliardy', 'miliard'), # 10^9
4: ('bilion', 'biliony', 'bilionů'), # 10^12
5: ('biliarda', 'biliardy', 'biliard'), # 10^15
6: ('trilion', 'triliony', 'trilionů'), # 10^18
7: ('triliarda', 'triliardy', 'triliard'), # 10^21
8: ('kvadrilion', 'kvadriliony', 'kvadrilionů'), # 10^24
9: ('kvadriliarda', 'kvadriliardy', 'kvadriliard'), # 10^27
10: ('quintillion', 'quintilliony', 'quintillionů'), # 10^30
1: ("tisíc", "tisíce", "tisíc"), # 10^3
2: ("milion", "miliony", "milionů"), # 10^6
3: ("miliarda", "miliardy", "miliard"), # 10^9
4: ("bilion", "biliony", "bilionů"), # 10^12
5: ("biliarda", "biliardy", "biliard"), # 10^15
6: ("trilion", "triliony", "trilionů"), # 10^18
7: ("triliarda", "triliardy", "triliard"), # 10^21
8: ("kvadrilion", "kvadriliony", "kvadrilionů"), # 10^24
9: ("kvadriliarda", "kvadriliardy", "kvadriliard"), # 10^27
10: ("quintillion", "quintilliony", "quintillionů"), # 10^30
}


class Num2Word_CZ(Num2Word_Base):
class Num2Word_CS(Num2Word_Base):
CURRENCY_FORMS = {
'CZK': (
('koruna', 'koruny', 'korun'), ('halíř', 'halíře', 'haléřů')
),
'EUR': (
('euro', 'euro', 'euro'), ('cent', 'centy', 'centů')
),
"CZK": (("koruna", "koruny", "korun"), ("halíř", "halíře", "haléřů")),
"EUR": (("euro", "euro", "euro"), ("cent", "centy", "centů")),
}

def setup(self):
self.negword = "mínus"
self.pointword = "celá"

def to_cardinal(self, number):
n = str(number).replace(',', '.')
if '.' in n:
left, right = n.split('.')
leading_zero_count = len(right) - len(right.lstrip('0'))
decimal_part = ((ZERO[0] + ' ') * leading_zero_count +
self._int2word(int(right)))
return u'%s %s %s' % (
n = str(number).replace(",", ".")
if "." in n:
left, right = n.split(".")
leading_zero_count = len(right) - len(right.lstrip("0"))
decimal_part = (ZERO[0] + " ") * leading_zero_count + self._int2word(
int(right)
)
return "%s %s %s" % (
self._int2word(int(left)),
self.pointword,
decimal_part
decimal_part,
)
else:
return self._int2word(int(n))
Expand Down Expand Up @@ -154,4 +151,4 @@ def _int2word(self, n):
if i > 0:
words.append(self.pluralize(x, THOUSANDS[i]))

return ' '.join(words)
return " ".join(words)
Loading

0 comments on commit 3377ee5

Please sign in to comment.