Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bulgarian language implementation #520

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions num2words/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from __future__ import unicode_literals

from . import (lang_AM, lang_AR, lang_AZ, lang_CZ, lang_DE, lang_DK, lang_EN,
lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO,
from . import (lang_AM, lang_AR, lang_AZ, lang_BG, lang_CZ, lang_DE, lang_DK,
lang_EN, lang_EN_IN, lang_EN_NG, lang_EO, lang_ES, lang_ES_CO,
lang_ES_GT, lang_ES_NI, lang_ES_VE, lang_FA, lang_FI, lang_FR,
lang_FR_BE, lang_FR_CH, lang_FR_DZ, lang_HE, lang_HU, lang_ID,
lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT,
Expand All @@ -30,6 +30,7 @@
'am': lang_AM.Num2Word_AM(),
'ar': lang_AR.Num2Word_AR(),
'az': lang_AZ.Num2Word_AZ(),
'bg': lang_BG.Num2Word_BG(),
'cz': lang_CZ.Num2Word_CZ(),
'en': lang_EN.Num2Word_EN(),
'en_IN': lang_EN_IN.Num2Word_EN_IN(),
Expand Down
342 changes: 342 additions & 0 deletions num2words/lang_BG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2003, Taro Ogawa. All Rights Reserved.
# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved.

# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA
from num2words.base import Num2Word_Base
from num2words.currency import parse_currency_parts, prefix_currency
from num2words.utils import splitbyx, get_digits

MINUS_PREFIX_WORD = "минус "
FLOAT_INFIX_WORD = ' цяло '
CONJUNCTION = 'и'

ZERO = 'нула'

ONES = {
1: ('едно', 'един', 'една', True),
2: ('две', 'два', 'две', True),
3: ('три', 'три', 'три', False),
4: ('четири', 'четири', 'четири', False),
5: ('пет', 'пет', 'пет', False),
6: ('шест', 'шест', 'шест', False),
7: ('седем', 'седем', 'седем', False),
8: ('осем', 'осем', 'осем', False),
9: ('девет', 'девет', 'девет', False),
}

TENS = {
0: ('десет',),
1: ('единадесет',),
2: ('дванадесет',),
3: ('тринадесет',),
4: ('четиринадесет',),
5: ('петнадесет',),
6: ('шестнадесет',),
7: ('седемнадесет',),
8: ('осемнадесет',),
9: ('деветнадесет',),
}

TWENTIES = {
2: ('двадесет',),
3: ('тридесет',),
4: ('четиридесет',),
5: ('петдесет',),
6: ('шейсет',),
7: ('седемдесет',),
8: ('осемдесет',),
9: ('деветдесет',),
}

HUNDREDS = {
1: ('сто',),
2: ('двеста',),
3: ('триста',),
4: ('четиристотин',),
5: ('петстотин',),
6: ('шестстотин',),
7: ('седемстотин',),
8: ('осемстотин',),
9: ('деветстотин',),
}

SCALE = {
0: ('', '', '', False),
1: ('хиляда', 'хиляди', 'хиляди', True), # 10^3
2: ('милион', 'милиона', 'милиони', False), # 10^6
3: ('милиард', 'милиарда', 'милиарди', False), # 10^9
4: ('трилион', 'трилиона', 'трилиони', False), # 10^12
5: ('квадрилион', 'квадрилиона', 'квадрилиони', False), # 10^15
6: ('квинтилион', 'квинтилиона', 'квинтилиони', False), # 10^18
7: ('секстилион', 'секстилиона', 'секстилиони', False), # 10^21
8: ('септилион', 'септилиона', 'септилиони', False), # 10^24
9: ('октилион', 'октилиона', 'октилиони', False), # 10^27
11: ('нонилион', 'нонилиона', 'нонилиони', False), # 10^30
12: ('децилион', 'децилиона', 'децилиони', False), # 10^33
13: ('ундецилион', 'ундецилиона', 'ундецилиони', False), # 10^36
14: ('дуодецилион', 'дуодецилиона', 'дуодецилиони', False), # 10^39
15: ('тридецилион', 'тридецилиона', 'тридецилиони', False), # 10^42
16: ('кваддецилион', 'кваддецилиона', 'кваддецилиони', False), # 10^45
17: ('квиндецилион', 'квиндецилиона', 'квиндецилиони', False), # 10^48
18: ('сексдецилион', 'сексдецилиона', 'сексдецилиони', False), # 10^51
19: ('септендецилион', 'септендецилиона', 'септендецилиони', False), # 10^54
20: ('октодецилион', 'октодецилиона', 'октодецилиони', False), # 10^57
21: ('новемдецилион', 'новемдецилиона', 'новемдецилиони', False), # 10^60
22: ('вигинтилион', 'вигинтилиона', 'вигинтилиони', False), # 10^63
}

CURRENCY_FORMS = {
'BGN': (
('лев', 'лева', 'лева', False),
('стотинка', 'стотинки', 'стотинки', False)
),
'EUR': (
('евро', 'евро', 'евро', False),
('цент', 'цента', 'цента', False)
),
'USD': (
('долар', 'долара', 'долара', False),
('цент', 'цента', 'цента', False)
)
}

ORDINAL_YEARS = {
'едно': 'първа',
'един': 'първа',
'една': 'първа',
'две': 'втора',
'три': 'трета',
'четири': 'четвърта',
'пет': 'пета',
'шест': 'шеста',
'седем': 'седма',
'осем': 'осма',
'девет': 'девета',
}

ORDINAL_ENDING = {
0: ('ти', 'та'),
1: ('ви', 'ва'),
2: ('ри', 'ра'),
3: ('ти', 'та'),
4: ('ти', 'та'),
5: ('ти', 'та'),
6: ('ти', 'та'),
7: ('ми', 'ма'),
8: ('ми', 'ма'),
9: ('ти', 'та'),
}

ORDINAL = {
'едно': 'първи',
'един': 'първи',
'една': 'първи',
'две': 'втори',
'три': 'трети',
'четири': 'четвърти',
'пет': 'пети',
'шест': 'шести',
'седем': 'седми',
'осем': 'осми',
'девет': 'девети',
'десет': 'десети',
}


class Num2Word_BG(Num2Word_Base):

negword = 'минус'

def __init__(self):
self.is_currency = False

def set_high_numwords(self, *args):
pass

def merge(self, curr, next):
pass

def pluralize(self, number, forms):
is_feminine = forms[-1]

if number == 1:
return forms[0]

if is_feminine:
form = 2
else:
if number % 10 == 1 and not self.is_currency:
form = 0
else:
form = 1

return forms[form]

def setup(self):
pass

def _int2word(self, number, feminine=False):
if number < 0:
return ' '.join([MINUS_PREFIX_WORD, self._int2word(abs(number))])

if number == 0:
return ZERO

words = []
self.chunks = list(splitbyx(str(number), 3))
chunk_len = len(self.chunks)

for chunk in self.chunks:
chunk_len -= 1
digit_right, digit_mid, digit_left = get_digits(chunk)

if digit_left > 0:
# the True is missing condition
if len(words) and True and digit_right == 0 and digit_mid == 0:
words.append(CONJUNCTION)

words.append(HUNDREDS[digit_left][0])

if digit_mid > 1:
if len(words) and digit_right == 0:
words.append(CONJUNCTION)

words.append(TWENTIES[digit_mid][0])

if digit_mid == 1:
if len(words) and digit_right == 0:
pass
words.append(CONJUNCTION)

words.append(TENS[digit_right][0])
elif digit_right > 0:
if len(words):
words.append(CONJUNCTION)

# digit gender depends from scale
gender_type = 0

is_feminine = feminine or SCALE[chunk_len][-1]

if is_feminine:
gender_type = 2
elif (len(self.chunks) > 2 and chunk_len > 1) or self.is_currency:
gender_type = 1

if chunk_len == 1 and len(self.chunks) == 2:
if digit_mid == 0 and digit_left == 0 and digit_right == 1:
pass
else:
words.append(
ONES[digit_right][gender_type]
)
else:
words.append(
ONES[digit_right][gender_type]
)

if chunk_len > 0 and chunk != 0:
words.append(self.pluralize(chunk, SCALE[chunk_len]))

return ' '.join(words)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a good decision to put boolean feminine parameter, because you have 3 genders - masculine, feminine and neuter. Later it will be a problem with backward compatibility.
Like here.
I suggest adding the gender parameter.

def to_cardinal(self, number, feminine=False):
n = str(number).replace(',', '.')
if '.' in n:
left, right = n.split('.')
return u'%s %s %s' % (
self._int2word(int(left), feminine),
self.pointword,
self._int2word(int(right), feminine)
)
else:
return self._int2word(int(n), feminine)

def to_currency(self, val, currency='BGN', cents=True, separator=' и',
adjective=False):
"""
Args:
val: Numeric value
currency (str): Currency code
cents (bool): Verbose cents
separator (str): Cent separator
adjective (bool): Prefix currency name with adjective
Returns:
str: Formatted string
"""
left, right, is_negative = parse_currency_parts(val)

try:
cr1, cr2 = CURRENCY_FORMS[currency]

except KeyError:
raise NotImplementedError(
'Currency code "%s" not implemented for "%s"' %
(currency, self.__class__.__name__))

if adjective and currency in self.CURRENCY_ADJECTIVES:
cr1 = prefix_currency(
self.CURRENCY_ADJECTIVES[currency],
cr1
)

minus_str = "%s " % self.negword if is_negative else ""
cents_str = self.to_cardinal(right, True) \
if cents else self._cents_terse(right, currency)

self.is_currency = True

return u'%s%s %s%s %s %s' % (
minus_str,
self.to_cardinal(left, feminine=False),
self.pluralize(left, cr1),
separator,
cents_str if cents else right,
self.pluralize(right, cr2)
)

def to_year(self, number, **kwargs):
self.verify_ordinal(number)
outwords = self._int2word(number, False).split(" ")
lastword = outwords[-1].lower()

try:
lastword = ORDINAL_YEARS[lastword]
except KeyError:
lastword = lastword + "а"

outwords[-1] = lastword

return " ".join(outwords)

def to_ordinal(self, number, feminine=False):
self.verify_ordinal(number)
outwords = self.to_cardinal(number).split(" ")
lastword = outwords[-1].lower()

try:
lastword = ORDINAL[lastword]
except KeyError:
if "десет" in lastword:
lastword = lastword + "и"

outwords[-1] = lastword

return " ".join(outwords)

def to_ordinal_num(self, number, feminine=False):
gender_idx = int(feminine)
last_digit = number % 10
return str(number) + '-' + ORDINAL_ENDING[last_digit][gender_idx]