From ba4c928b60fc0e4911af96cc851691ddbdd9130c Mon Sep 17 00:00:00 2001
From: Isin Demirsahin <isin@google.com>
Date: Fri, 14 Feb 2025 08:05:39 -0800
Subject: [PATCH] Added Language as a Feature.

PiperOrigin-RevId: 726931209
---
 .../language_params/BUILD.bazel               |   2 +-
 .../natural_translit/language_params/en.py    |   4 +-
 .../natural_translit/phonology/BUILD.bazel    |   1 +
 .../phonology/features/BUILD.bazel            |  21 +++
 .../phonology/features/README.md              |  15 +-
 .../phonology/features/docs/language.md       |  70 ++++++++++
 .../phonology/features/language.py            | 128 ++++++++++++++++++
 .../phonology/features/language_test.py       |  33 +++++
 .../phonology/inventories/BUILD.bazel         |  12 +-
 .../inventories/docs/{x_mul.md => x_uni.md}   |   2 +-
 .../inventories/{x_mul.py => x_uni.py}        |   0
 .../{x_mul_test.py => x_uni_test.py}          |   2 +-
 .../phonology/phonological_symbol.py          |  39 ++++--
 .../phonology/phonological_symbol_test.py     |  29 +++-
 .../scripts/natural_translit/utils/feature.py |  28 +++-
 15 files changed, 354 insertions(+), 32 deletions(-)
 create mode 100644 nisaba/scripts/natural_translit/phonology/features/docs/language.md
 create mode 100644 nisaba/scripts/natural_translit/phonology/features/language.py
 create mode 100644 nisaba/scripts/natural_translit/phonology/features/language_test.py
 rename nisaba/scripts/natural_translit/phonology/inventories/docs/{x_mul.md => x_uni.md} (99%)
 rename nisaba/scripts/natural_translit/phonology/inventories/{x_mul.py => x_uni.py} (100%)
 rename nisaba/scripts/natural_translit/phonology/inventories/{x_mul_test.py => x_uni_test.py} (99%)

diff --git a/nisaba/scripts/natural_translit/language_params/BUILD.bazel b/nisaba/scripts/natural_translit/language_params/BUILD.bazel
index c2aa72f7..f52f3a01 100644
--- a/nisaba/scripts/natural_translit/language_params/BUILD.bazel
+++ b/nisaba/scripts/natural_translit/language_params/BUILD.bazel
@@ -42,7 +42,7 @@ py_library(
     name = "en",
     srcs = ["en.py"],
     deps = [
-        "//nisaba/scripts/natural_translit/phonology/inventories:x_mul",
+        "//nisaba/scripts/natural_translit/phonology/inventories:x_uni",
         "//nisaba/scripts/natural_translit/script:grapheme",
         "//nisaba/scripts/natural_translit/script/inventories:latn",
     ],
diff --git a/nisaba/scripts/natural_translit/language_params/en.py b/nisaba/scripts/natural_translit/language_params/en.py
index 517f24ae..8a9c7d16 100644
--- a/nisaba/scripts/natural_translit/language_params/en.py
+++ b/nisaba/scripts/natural_translit/language_params/en.py
@@ -14,7 +14,7 @@
 
 """Language parameters for English."""
 
-from nisaba.scripts.natural_translit.phonology.inventories import x_mul
+from nisaba.scripts.natural_translit.phonology.inventories import x_uni
 from nisaba.scripts.natural_translit.script import grapheme as g
 from nisaba.scripts.natural_translit.script.inventories import latn as l
 
@@ -22,7 +22,7 @@
 def _latn_inventory() -> g.Grapheme.Inventory:
   """Builds a grapheme inventory for English."""
   latn = l.LATN
-  ph = x_mul.PHONEMES
+  ph = x_uni.PHONEMES
   gr = g.Grapheme.Inventory(g.Grapheme.GR_FEATURES.script.latn, 'en')
   lowercase = [
       latn.a,
diff --git a/nisaba/scripts/natural_translit/phonology/BUILD.bazel b/nisaba/scripts/natural_translit/phonology/BUILD.bazel
index 6ec95f85..d7084cec 100644
--- a/nisaba/scripts/natural_translit/phonology/BUILD.bazel
+++ b/nisaba/scripts/natural_translit/phonology/BUILD.bazel
@@ -68,6 +68,7 @@ py_library(
     srcs = ["phonological_symbol.py"],
     deps = [
         "//nisaba/scripts/natural_translit/phonology/features:descriptive",
+        "//nisaba/scripts/natural_translit/phonology/features:language",
         "//nisaba/scripts/natural_translit/utils:expression",
         "//nisaba/scripts/natural_translit/utils:feature",
         "//nisaba/scripts/natural_translit/utils:inventory",
diff --git a/nisaba/scripts/natural_translit/phonology/features/BUILD.bazel b/nisaba/scripts/natural_translit/phonology/features/BUILD.bazel
index 1d2af0ec..a0c32971 100644
--- a/nisaba/scripts/natural_translit/phonology/features/BUILD.bazel
+++ b/nisaba/scripts/natural_translit/phonology/features/BUILD.bazel
@@ -37,3 +37,24 @@ py_test(
         "@io_abseil_py//absl/testing:absltest",
     ],
 )
+
+py_library(
+    name = "language",
+    srcs = ["language.py"],
+    deps = [
+        "//nisaba/scripts/natural_translit/utils:feature",
+        "//nisaba/scripts/natural_translit/utils:type_op",
+        requirement("pycountry"),
+    ],
+)
+
+py_test(
+    name = "language_test",
+    srcs = ["language_test.py"],
+    main = "language_test.py",
+    deps = [
+        ":language",
+        "//nisaba/scripts/natural_translit/utils:test_op",
+        "@io_abseil_py//absl/testing:absltest",
+    ],
+)
diff --git a/nisaba/scripts/natural_translit/phonology/features/README.md b/nisaba/scripts/natural_translit/phonology/features/README.md
index bbdf6e87..d81b7a72 100644
--- a/nisaba/scripts/natural_translit/phonology/features/README.md
+++ b/nisaba/scripts/natural_translit/phonology/features/README.md
@@ -2,11 +2,24 @@
 
 [TOC]
 
+## Language
+
+The language inventory is organized by language families. The depth and the
+granularity of the tree is pragmatically determined and can be updated as
+needed.
+
+For example, the Tamil-Kota branch of the Dravidian family is where the dental
+vs alveolar place becomes contrastive for /t̪/ and /t/, which has a direct
+impact on transliteration, so it is added as a node.
+
+[Family tree for the supported languages](https://github.com/google-research/nisaba/blob/main/nisaba/scripts/natural_translit/phonology/features/docs/language.md)
+
 ## Descriptive features
 
 This inventory defines a set of descriptive phonological features that can be
 used to build phonological profiles based on where a phonological symbol is
-located on an[IPA chart](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet_chart).
+located on an
+[IPA chart](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet_chart).
 
 ### How to build a feature profile
 
diff --git a/nisaba/scripts/natural_translit/phonology/features/docs/language.md b/nisaba/scripts/natural_translit/phonology/features/docs/language.md
new file mode 100644
index 00000000..ebdd90e4
--- /dev/null
+++ b/nisaba/scripts/natural_translit/phonology/features/docs/language.md
@@ -0,0 +1,70 @@
+# Languages
+
+Feature family tree for the supported
+[languages](https://github.com/google-research/nisaba/blob/main/nisaba/scripts/natural_translit/phonology/features/README.md#language).
+
+<!-- AUTO-GENERATED INVENTORY STRING STARTS HERE -->
+
+```dot
+graph {
+ordering="out"
+size = 12
+rankdir="LR"
+ranksep="1, equally"
+style="invis"
+node [shape="plain"]
+language [label="Language"]
+dravidian [label="Dravidian"]
+language -- dravidian
+subgraph {
+kn [label="Kannada"]
+dravidian -- kn
+te [label="Telugu"]
+dravidian -- te
+tamil_kota [label="Tamil-Kota"]
+dravidian -- tamil_kota
+subgraph {
+ml [label="Malayalam"]
+tamil_kota -- ml
+ta [label="Tamil"]
+tamil_kota -- ta
+}
+}
+indo_european [label="Indo-European"]
+language -- indo_european
+subgraph {
+indo_aryan [label="Indo-Aryan"]
+indo_european -- indo_aryan
+subgraph {
+bn [label="Bengali"]
+indo_aryan -- bn
+gu [label="Gujarati"]
+indo_aryan -- gu
+hi [label="Hindi"]
+indo_aryan -- hi
+mr [label="Marathi"]
+indo_aryan -- mr
+pa [label="Panjabi"]
+indo_aryan -- pa
+}
+germanic [label="Germanic"]
+indo_european -- germanic
+subgraph {
+en [label="English"]
+germanic -- en
+}
+}
+mixed_family [label="Mixed Family Tags"]
+language -- mixed_family
+subgraph {
+mul [label="Multiple Languages"]
+mixed_family -- mul
+und [label="Undetermined"]
+mixed_family -- und
+x_uni [label="Unified Multilingual"]
+mixed_family -- x_uni
+x_psa [label="Pan South Asian"]
+mixed_family -- x_psa
+}
+}
+```
diff --git a/nisaba/scripts/natural_translit/phonology/features/language.py b/nisaba/scripts/natural_translit/phonology/features/language.py
new file mode 100644
index 00000000..ec820255
--- /dev/null
+++ b/nisaba/scripts/natural_translit/phonology/features/language.py
@@ -0,0 +1,128 @@
+# Copyright 2024 Nisaba Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Language feature for PhonologicalSymbol."""
+
+from __future__ import annotations
+
+from typing import Union
+
+import pycountry
+
+from nisaba.scripts.natural_translit.utils import feature as ft
+from nisaba.scripts.natural_translit.utils import type_op as ty
+
+
+class Language(ft.Feature):
+  """Language feature for Phon."""
+
+  OR_NOTHING = Union['Language', ty.Nothing]
+
+  def __init__(self, alias: str, text: str, index: int):
+    super().__init__(alias, text)
+    self.index = index
+
+  @classmethod
+  def from_iso(cls, iso_code: str) -> Language:
+    if len(iso_code) == 2:
+      language = pycountry.languages.get(alpha_2=iso_code)
+    elif len(iso_code) == 3:
+      language = pycountry.languages.get(alpha_3=iso_code)
+    else:
+      raise ValueError(f'Invalid ISO code: {iso_code}')
+    return cls(
+        iso_code.lower(),
+        language.name,
+        list(pycountry.languages).index(language)
+    )
+
+  class Inventory(ft.Feature.Inventory):
+    """Language feature inventory."""
+
+    def __str__(self) -> str:
+      """Language family tree as markdown dot code block."""
+      # Currently all languages are equidistant and have 1 distance from each
+      # other so there is no need for distance tables. If the subgroup steps
+      # are changed to reflect similarity of related languages, the distance
+      # tables should be appended below.
+      return self.language.visualize()
+
+
+def _features() -> Language.Inventory:
+  """Language feature inventory.
+
+  Returns:
+    An inventory of languages.
+
+  The language inventory is organised by language families. The depth and the
+  granularity of the family hierarchy is pragmatic and can be updated as needed.
+  """
+  f = ft.Feature
+  features = Language.Inventory(
+      'languages',
+      f.Aspect(
+          f.equidistant(
+              'language',
+              f.equidistant(
+                  'dravidian',
+                  Language.from_iso('kn'),
+                  Language.from_iso('te'),
+                  f.equidistant(
+                      ('tamil_kota', 'Tamil-Kota'),
+                      Language.from_iso('ml'),
+                      Language.from_iso('ta'),
+                  ),
+              ),
+              f.equidistant(
+                  ('indo_european', 'Indo-European'),
+                  f.equidistant(
+                      ('indo_aryan', 'Indo-Aryan'),
+                      Language.from_iso('bn'),
+                      Language.from_iso('gu'),
+                      Language.from_iso('hi'),
+                      Language.from_iso('mr'),
+                      Language.from_iso('pa'),
+                  ),
+                  f.equidistant(
+                      'germanic',
+                      Language.from_iso('en'),
+                  ),
+              ),
+              f.equidistant(
+                  ('mixed_family', 'Mixed Family Tags'),
+                  # ISO 'mul' is a generic code for mixed-language datasets.
+                  Language.from_iso('mul'),
+                  Language.from_iso('und'),
+                  # x_uni is a custom code for the unified phoneme inventory
+                  # that will cover all IPA symbols and PHOIBLE segments. Index
+                  # 8000 is reserved for the unified inventory. This will
+                  # provide unique indices fol all phonemes as long as the
+                  # number of pycountry languages is less than 8000 (currently
+                  # 7874). If the number of pycounty languages exceeds 8000, we
+                  # can change ReservedIndex for phoneme to 30_000_000 and
+                  # LANG_PREFIX_MULTIPLIER to 1_000 to accommodate.
+                  Language('x_uni', 'Unified Multilingual', 8_000),
+                  # x_uni will potentially cover 2k to 6k segments, resulting in
+                  # symbol indices from 3_800_001 up to 3_806_xxx. In order to
+                  # avoid symbol index collision, we start other custom language
+                  # indices from 9000.
+                  Language('x_psa', 'Pan South Asian', 9_001),
+              ),
+          ),
+      ),
+  )
+  return features
+
+
+FEATURES = _features()
diff --git a/nisaba/scripts/natural_translit/phonology/features/language_test.py b/nisaba/scripts/natural_translit/phonology/features/language_test.py
new file mode 100644
index 00000000..cd12f3d9
--- /dev/null
+++ b/nisaba/scripts/natural_translit/phonology/features/language_test.py
@@ -0,0 +1,33 @@
+# Copyright 2024 Nisaba Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from absl.testing import absltest
+from nisaba.scripts.natural_translit.phonology.features import language
+from nisaba.scripts.natural_translit.utils import test_op
+
+_LANG = language.FEATURES.language
+
+
+class LanguageTest(test_op.TestCase):
+
+  def test_language(self):
+    self.assertEqual(_LANG.bn.index, 624)
+    self.assertEqual(_LANG.x_uni.index, 8_000)
+
+  def test_no_duplicate_indices(self):
+    index_list = [l.index for l in _LANG]
+    self.assertEqual(len(set(index_list)), len(index_list))
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/nisaba/scripts/natural_translit/phonology/inventories/BUILD.bazel b/nisaba/scripts/natural_translit/phonology/inventories/BUILD.bazel
index 3659c4e8..d0cf07c2 100644
--- a/nisaba/scripts/natural_translit/phonology/inventories/BUILD.bazel
+++ b/nisaba/scripts/natural_translit/phonology/inventories/BUILD.bazel
@@ -19,19 +19,19 @@ package(
 )
 
 py_library(
-    name = "x_mul",
-    srcs = ["x_mul.py"],
+    name = "x_uni",
+    srcs = ["x_uni.py"],
     deps = [
         "//nisaba/scripts/natural_translit/phonology:phonological_symbol",
     ],
 )
 
 py_test(
-    name = "x_mul_test",
-    srcs = ["x_mul_test.py"],
-    main = "x_mul_test.py",
+    name = "x_uni_test",
+    srcs = ["x_uni_test.py"],
+    main = "x_uni_test.py",
     deps = [
-        ":x_mul",
+        ":x_uni",
         "//nisaba/scripts/natural_translit/utils:test_op",
         "@io_abseil_py//absl/testing:absltest",
     ],
diff --git a/nisaba/scripts/natural_translit/phonology/inventories/docs/x_mul.md b/nisaba/scripts/natural_translit/phonology/inventories/docs/x_uni.md
similarity index 99%
rename from nisaba/scripts/natural_translit/phonology/inventories/docs/x_mul.md
rename to nisaba/scripts/natural_translit/phonology/inventories/docs/x_uni.md
index 189c3df5..9e73411a 100644
--- a/nisaba/scripts/natural_translit/phonology/inventories/docs/x_mul.md
+++ b/nisaba/scripts/natural_translit/phonology/inventories/docs/x_uni.md
@@ -6,7 +6,7 @@ Feature tables for the phonological symbols in multilingual 'x_mul' inventory.
 
 <!-- AUTO-GENERATED INVENTORY STRING STARTS HERE -->
 
-## Inventory: x_mul
+## Inventory: x_uni
 
 ### alias: a	ipa: a	name: open front unrounded vowel
   a features:
diff --git a/nisaba/scripts/natural_translit/phonology/inventories/x_mul.py b/nisaba/scripts/natural_translit/phonology/inventories/x_uni.py
similarity index 100%
rename from nisaba/scripts/natural_translit/phonology/inventories/x_mul.py
rename to nisaba/scripts/natural_translit/phonology/inventories/x_uni.py
diff --git a/nisaba/scripts/natural_translit/phonology/inventories/x_mul_test.py b/nisaba/scripts/natural_translit/phonology/inventories/x_uni_test.py
similarity index 99%
rename from nisaba/scripts/natural_translit/phonology/inventories/x_mul_test.py
rename to nisaba/scripts/natural_translit/phonology/inventories/x_uni_test.py
index 1296c8ea..f0cd0e9c 100644
--- a/nisaba/scripts/natural_translit/phonology/inventories/x_mul_test.py
+++ b/nisaba/scripts/natural_translit/phonology/inventories/x_uni_test.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from absl.testing import absltest
-from nisaba.scripts.natural_translit.phonology.inventories import x_mul
+from nisaba.scripts.natural_translit.phonology.inventories import x_uni
 from nisaba.scripts.natural_translit.utils import test_op
 
 
diff --git a/nisaba/scripts/natural_translit/phonology/phonological_symbol.py b/nisaba/scripts/natural_translit/phonology/phonological_symbol.py
index 3df3186f..69c4aeca 100644
--- a/nisaba/scripts/natural_translit/phonology/phonological_symbol.py
+++ b/nisaba/scripts/natural_translit/phonology/phonological_symbol.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 from nisaba.scripts.natural_translit.phonology.features import descriptive
+from nisaba.scripts.natural_translit.phonology.features import language as lang
 from nisaba.scripts.natural_translit.utils import expression as exp
 from nisaba.scripts.natural_translit.utils import feature as ft
 from nisaba.scripts.natural_translit.utils import inventory as i
@@ -28,6 +29,7 @@ class PhonologicalSymbol(sym.Symbol):
   """Parent class for symbols with phonological features."""
 
   DESCRIPTIVE_FEATURES = descriptive.FEATURES
+  LANGUAGE = lang.FEATURES.language
 
   def __init__(
       self,
@@ -39,6 +41,7 @@ def __init__(
   ):
     super().__init__(alias, raw=raw, index=index, name=name)
     self.text = raw if raw else self.alias
+    self.language = self.LANGUAGE.x_uni  # Default Unified Multilingual.
     self.features.new_profile(
         ft.Feature.Profile(self.DESCRIPTIVE_FEATURES, 'new')
     )
@@ -60,6 +63,9 @@ def update_descriptives_from_symbol(
     self.update_descriptives(*(s.descriptives() for s in symbols))
     return self
 
+  def has_feature(self, value: ft.Feature.Aspect.VALUES) -> bool:
+    return value.is_in(ft.Feature.Set(self.features, self.language))
+
   class Inventory(sym.Symbol.Inventory):
     """Phonological symbol inventory."""
 
@@ -113,13 +119,11 @@ def sync_atomics(
 class Phon(PhonologicalSymbol):
   """Class for representing phonemes, phones, and phonological modifiers."""
 
-  # TODO(): Add pycountry languages as a feature similar to scripts.
-  # Phone index will be determined by a combination of the reserved prefix for
-  # phonemes, the index of the country code, and the index of the phon in
-  # the language inventory.
+  LANG_PREFIX_MULTIPLIER = 100
+
   def __init__(
       self,
-      language: str = '',
+      language: lang.Language.OR_NOTHING = ty.UNSPECIFIED,
       alias: str = '',
       ipa: str = '',
       index: ty.IntOrNothing = ty.UNSPECIFIED,
@@ -131,7 +135,7 @@ def __init__(
         index=index,
         name=name,
     )
-    self.language = language if language else 'x_mul'  # Custom multilingual.
+    self.language = ty.type_check(language, self.LANGUAGE.x_uni)
     self.ipa = ipa
     self.add_features(features)
 
@@ -142,10 +146,15 @@ def description(self, show_features: bool = False) -> str:
       text += f'\n  {self.features}'
     return text
 
-  def copy(self, language: str = '', alias: str = '', ipa: str = '') -> Phon:
+  def copy(
+      self,
+      language: lang.Language.OR_NOTHING = ty.UNSPECIFIED,
+      alias: str = '',
+      ipa: str = '',
+  ) -> Phon:
     """Creates a copy of the Phon."""
     return Phon(
-        language=language if language else self.language,
+        language=ty.type_check(language, self.language),
         alias=alias if alias else self.alias,
         ipa=ipa if ipa else self.ipa,
         index=self.index,
@@ -156,14 +165,20 @@ def copy(self, language: str = '', alias: str = '', ipa: str = '') -> Phon:
   class Inventory(PhonologicalSymbol.Inventory):
     """Phon inventory."""
 
-    def __init__(self, language: str = ''):
-      language = language if language else 'x_mul'  # Custom multilingual.
-      super().__init__(alias=language, typed=Phon)
+    def __init__(self, language: lang.Language.OR_NOTHING = ty.UNSPECIFIED):
+      language = ty.type_check(language, Phon.LANGUAGE.x_uni)
+      super().__init__(language.alias, typed=Phon)
       self.language = language
 
     def _add_phoneme(self, phoneme: Phon) -> bool:
       """Adds a phoneme to the inventory."""
-      phoneme.index = Phon.ReservedIndex.PHONEME_PREFIX + len(self) + 1
+      phoneme.language = self.language
+      phoneme.index = (
+          Phon.ReservedIndex.PHONEME_PREFIX
+          + int(self.language.index) * Phon.LANG_PREFIX_MULTIPLIER
+          + len(self)
+          + 1
+      )
       return self._add_symbol_and_atomic(phoneme)
 
     def add_phonemes(self, *phonemes: Phon, list_alias: str = '') -> list[Phon]:
diff --git a/nisaba/scripts/natural_translit/phonology/phonological_symbol_test.py b/nisaba/scripts/natural_translit/phonology/phonological_symbol_test.py
index 29a52be6..ccc5d6c0 100644
--- a/nisaba/scripts/natural_translit/phonology/phonological_symbol_test.py
+++ b/nisaba/scripts/natural_translit/phonology/phonological_symbol_test.py
@@ -16,11 +16,13 @@
 from nisaba.scripts.natural_translit.phonology import phonological_symbol as po
 from nisaba.scripts.natural_translit.utils import test_op
 
+_LANG = po.Phon.LANGUAGE
+
 
 def _test_inventory() -> po.Phon.Inventory:
   """Multilingual Phon inventory."""
   phf = po.Phon.DESCRIPTIVE_FEATURES
-  ph_inv = po.Phon.Inventory()
+  ph_inv = po.Phon.Inventory(_LANG.en)
   vowels = [
       ('a', 'a', 'open front unrounded vowel'),
       ('e', 'e', 'close_mid front unrounded vowel'),
@@ -55,9 +57,27 @@ def _test_inventory() -> po.Phon.Inventory:
 class PhonologicalSymbolTest(test_op.TestCase):
 
   def test_phon_inventory(self):
-    self.assertEqual(_TEST.alias, 'x_mul')
+    self.assertEqual(_TEST.alias, 'en')
+    self.assertEqual(_TEST.language, _LANG.en)
+    self.assertEqual(_TEST.a.language, _LANG.en)
+    self.assertEqual(
+        _TEST.a.index,
+        po.Phon.ReservedIndex.PHONEME_PREFIX
+        + _LANG.en.index * po.Phon.LANG_PREFIX_MULTIPLIER
+        + 1,
+    )
     self.assertIn(_TEST.a, _TEST.vowel)
 
+  def test_has_feature(self):
+    self.AssertHasFeature(_TEST.a, _LANG.en)
+    self.AssertHasFeature(_TEST.a, _LANG.germanic)
+    self.AssertHasFeature(_TEST.a, _LANG.indo_european)
+    self.AssertNotHasFeature(_TEST.a, _LANG.indo_aryan)
+    self.AssertNotHasFeature(_TEST.a, _LANG.x_uni)
+    self.AssertHasFeature(
+        _TEST.i, po.Phon.DESCRIPTIVE_FEATURES.ph_class.vowel
+    )
+
   def test_phon_description(self):
     self.assertEqual(
         _TEST.a.description(show_features=True),
@@ -86,11 +106,6 @@ def test_phon_description(self):
         '| syllabicity      | syllabic       |\n',
     )
 
-  def test_has_feature(self):
-    self.AssertHasFeature(
-        _TEST.i, po.Phon.DESCRIPTIVE_FEATURES.ph_class.vowel
-    )
-
   def test_phon_copy(self):
     self.assertEqual(
         _TEST.a.copy(alias='a_copy').description(show_features=True),
diff --git a/nisaba/scripts/natural_translit/utils/feature.py b/nisaba/scripts/natural_translit/utils/feature.py
index df6739d0..5ecbba03 100644
--- a/nisaba/scripts/natural_translit/utils/feature.py
+++ b/nisaba/scripts/natural_translit/utils/feature.py
@@ -383,7 +383,7 @@ def __init__(
         step: float,
         *features: Feature.Aspect.VALUES,
     ):
-      alias, text = alias if isinstance(alias, tuple) else alias, alias
+      alias, text = ty.type_check(alias, (alias, alias))
       super().__init__(alias=alias)
       self.text = text
       self._item_type = Union[Feature, Feature.ValueList]
@@ -503,6 +503,21 @@ def is_in(self, obj: ...) -> bool:
       """Checks if this list is contained within the given object."""
       return value_in(self, obj)
 
+    def visualize(self) -> str:
+      """Returns graphviz subgraph for the ValueList."""
+      graph = ''
+      for item in self:
+        graph += (
+            f'{item.alias} [label="{item.text.title()}"]\n'
+            f'{self.alias} -- {item.alias}\n'
+        )
+        if isinstance(item, Feature.ValueList):
+          graph += (
+              f'subgraph {_L_BRACE}\n'
+              f'{item.visualize()}{_R_BRACE}\n'
+          )
+      return graph
+
   class Aspect(inventory.Inventory):
     """An aspect that can be defined by a list of contrastive values.
 
@@ -629,6 +644,17 @@ def is_applicable(self, profile: Feature.Profile) -> bool:
           and self.n_a not in profile.get(self)
       )
 
+    def visualize(self) -> str:
+      """Returns markdown dot code block for the Aspect."""
+      r = self.root_list
+      return (
+          f'```dot\ngraph {_L_BRACE}\n'
+          'ordering="out"\nsize = 12\nrankdir="LR"\nranksep="1, equally"\n'
+          'style="invis"\nnode [shape="plain"]\n'
+          f'{r.alias} [label="{r.text.capitalize()}"]\n{r.visualize()}'
+          f'{_R_BRACE}\n```'
+      )
+
   class Inventory(inventory.Inventory):
     """An inventory of Aspects and their contrastive features.