Skip to content

Commit

Permalink
Added more languages;
Browse files Browse the repository at this point in the history
Updated README.md
  • Loading branch information
Halvani committed May 25, 2024
1 parent 14babda commit a28208c
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ ws.pretty_print(ws.by_language(ws.Language.Arabic, letter_case=ws.LetterCase.Low
```

## Contribution
If you like this project, you are welcome to support it, e.g. by testing it or providing additional languages (there is a **lot** to do with regard to the [remaining languages](https://www.loc.gov/standards/iso639-2/php/code_list.php)). Feel free to fork the repository and create a pull request to suggest and collaborate on changes.
If you like this project, you are welcome to support it, e.g. by testing it or providing additional languages (there is a **lot** to do with regard to the [remaining languages](https://www.omniglot.com/writing/languages.htm)). Feel free to fork the repository and create a pull request to suggest and collaborate on changes.

## Disclaimer
Although this project has been carried out with great care, no liability is accepted for the completeness and accuracy of all the underlying data. The use of Alphabetic for integration into production systems is at your own risk!
Expand Down
11 changes: 10 additions & 1 deletion alphabetic/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def update_lang_json_file(iso_name: str, script: list[str]) -> None:
elif iso_name in iso_639_3_language_code_db:
language_print_name = iso_639_3_language_code_db[iso_name]

print(f"✅ Updated json-file successfully!\nLanguage: {language_print_name};\nLanguage code: {iso_name}; Alphabet size: {len(created_dict[iso_name]['script'])} (characters).\nNote, in order to use this language, you must add the respective entry: {language_print_name} = '{iso_name}' to the enum class Language.")
print(f"✅ Updated json-file successfully!\nLanguage: {language_print_name}; Language code: {iso_name}; Alphabet size: {len(created_dict[iso_name]['script'])} (characters).\nNote, in order to use this language, you must add the respective entry: {language_print_name} = '{iso_name}' to the enum class Language.")
else:
print("❌ Specified language code: {iso_name} was not found in updated json file!")

Expand Down Expand Up @@ -272,6 +272,11 @@ class Language(Enum):
Danish = "dan", # Script type: Alphabet; Writing system: Latin (Danish alphabet), Danish Braille
Dungan = "dng", # Script type: Alphabet; Writing system: Cyrillic (official), Chinese characters (obsolete), Xiao'erjing (obsolete), Latin (historical)
Dutch = "nld", # Script type: Alphabet; Writing system: Latin (Dutch alphabet), Dutch Braille
Flemish = "dut", # Script type: Alphabet; Writing system: Latin (Dutch alphabet), Dutch Braille
Swiss_German = "gsw", # Script type: Alphabet; Writing system: Latin
Bavarian = "bar", # Script type: Alphabet; Writing system: Latin alphabet, Marcomannic (historically)
Cimbrian = "cim", # Script type: Alphabet; Writing system: Latin
Zeeuws = "zea", # Script type: Alphabet; Writing system: Zeelandic alphabet (Latin)
Dzongkha = "dzo", # Script type: Abugida; Writing system: Tibetan script, Dzongkha Braille
English = "eng", # Script type: Alphabet; Writing system: Latin script
Esperanto = "epo", # Script type: Alphabet; Writing system: Latin script (Esperanto alphabet), Esperanto Braille
Expand Down Expand Up @@ -367,8 +372,11 @@ class Language(Enum):
Turkmen = "tuk", # Script type: Alphabet; Writing system: Latin (Turkmen alphabet, official in Turkmenistan), Perso-Arabic, Cyrillic, Turkmen Braille
Arapaho = "arp", # Script type: Alphabet; Writing system: Latin
Istro_Romanian = "ruo", # Script type: Alphabet; Writing system: Latin
Vengo = "bav", # Script type: Alphabet; Writing system: Latin
Tuvan = "tyv", # Script type: Alphabet; Writing system: Cyrillic script
Twi = "twi", # Script type: Alphabet; Writing system: Latin
Elfdalian = "ovd", # Script type: Alphabet; Writing system: Latin (Elfdalian alphabet), Dalecarlian runes, (until the 20th century)
Cornish = "cor", # Script type: Alphabet; Writing system: Latin alphabet
Luxembourgish = "ltz", # Script type: Alphabet; Writing system: Latin (Luxembourgish alphabet), Luxembourgish Braille
Ukrainian = "ukr", # Script type: Alphabet; Writing system: Cyrillic (Ukrainian alphabet), Ukrainian Braille
Uzbek = "uzb", # Script type: Alphabet; Writing system: Latin (Uzbek alphabet), Cyrillic, Perso-Arabic, Uzbek Braille, (Uzbek alphabets)
Expand Down Expand Up @@ -573,6 +581,7 @@ def is_abjad(self, sequence: str, strip_spaces: bool = True) -> bool:
return self.is_writing_system(sequence, self.Abjad.__name__, strip_spaces)

def is_abugida(self, sequence: str, strip_spaces: bool = True) -> bool:
#TODO: Decomposition of abugida graphemes can be accomplished as follows: print(*list("ਸ੍ਰੀ ਅਕਾਲ")) # --> ਸ ੍ ਰ ੀ ਅ ਕ ਾ ਲ
return self.is_writing_system(sequence, self.Abugida.__name__, strip_spaces)

def is_syllabary(self, sequence: str, strip_spaces: bool = True) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion alphabetic/data/alphabet.json

Large diffs are not rendered by default.

0 comments on commit a28208c

Please sign in to comment.