From 13d2e8d06a72aa65a27c9427b49893410bb33657 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Thu, 5 Dec 2019 13:15:10 +0100
Subject: [PATCH 1/6] Fix/scansion (#62)

* Fixed syllabification exceptions, support for disabling/enabling spacy_affixes

* Fixed multiline break

* Fixed splitted verb stresses and secondary stress on '-mente' adverbs

* Fixed reviewed issues

* Fixed reviewed issues 2nd wave

* Added minimum length for '-mente' adverbs
---
 src/rantanplan/core.py                    | 156 ++++++++++++++-----
 src/rantanplan/pipeline.py                |  14 +-
 tests/fixtures/phonological_groups.json   | 180 +++++++++++-----------
 tests/fixtures/rhyme_analysis_sonnet.json |  30 +---
 tests/test_pipeline.py                    |  18 +++
 5 files changed, 242 insertions(+), 156 deletions(-)

diff --git a/src/rantanplan/core.py b/src/rantanplan/core.py
index 1f3a659..e1c9cd2 100644
--- a/src/rantanplan/core.py
+++ b/src/rantanplan/core.py
@@ -71,18 +71,21 @@
 WEAK_VOWELS = set("iuüíúIÍUÜÚ")
 LIAISON_FIRST_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚyY")
 LIAISON_SECOND_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚhyYH")
+
 STRESSED_UNACCENTED_MONOSYLLABLES = {"yo", "vio", "dio", "fe", "sol", "ti",
                                      "un"}
+
 UNSTRESSED_UNACCENTED_MONOSYLLABLES = {'de', 'el', 'la', 'las', 'le', 'les',
                                        'lo', 'los',
                                        'mas', 'me', 'mi', 'nos', 'os', 'que',
                                        'se', 'si',
                                        'su', 'tan', 'te', 'tu', "tus", "oh"}
-UNSTRESSED_FORMS = {"que", "cual", "quien", "donde", "cuando", "cuanto",
-                    "como"}
 
-POSSESSIVE_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
-                   "tuyas", "suyo", "suya", "suyos", "suyas"}
+UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
+                    "cuando", "cuanto", "como", "cuantas", "cuantos"}
+
+STRESSED_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
+                 "tuyas", "suyo", "suya", "suyos", "suyas", "todo"}
 
 POSSESSIVE_PRON_UNSTRESSED = {"nuestro", "nuestra", "nuestros", "nuestras",
                               "vuestro", "vuestra", "vuestros", "vuestras"}
@@ -282,14 +285,29 @@ def get_stresses(phonological_groups):
     :return: List of boolean values indicating whether a group is
     stressed (True) or not (False)
     """
-    stresses = [group["is_stressed"] for group in phonological_groups]
+    # stresses = [group["is_stressed"] for group in phonological_groups]
+    stresses = []
+    last_word_syllables = []
+    for group in phonological_groups:
+        stresses.append(group["is_stressed"])
+    for group in phonological_groups:
+        last_word_syllables.append(group.get("is_word_end", False))
+    # Get position for the last syllable of the penultimate word
+    if last_word_syllables.count(True) > 1:
+        penultimate_word = -(
+            [i for i, n in enumerate(last_word_syllables[::-1]) if n][1] + 1)
+    else:
+        penultimate_word = None
     last_stress = -(stresses[::-1].index(True) + 1)
     # Oxytone (Aguda)
     if last_stress == -1:
         stresses.append(False)
     # Paroxytone (Esdrújula) or Proparoxytone (Sobreesdrújula)
     elif last_stress <= -3:
-        stresses.pop()
+        if penultimate_word is None:
+            stresses.pop()
+        elif last_stress > penultimate_word:
+            stresses.pop()
     return stresses
 
 
@@ -469,7 +487,7 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
     """
     Gets a list of syllables from a word and creates a list with syllabified
     word and stressed syllable index
-    :param word: List of str representing syllables
+    :param word: Word string
     :param alternative_syllabification: Wether or not the alternative
     syllabification is used
     :param pos: PoS tag from spacy ("DET")
@@ -481,7 +499,19 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
     :rtype: dict
     """
     syllable_list, _ = syllabify(word, alternative_syllabification)
-    word_lower = "".join(word).lower()
+    word_lower = word.lower()
+    # Handle secondary stress on adverbs ending in -mente
+    if pos == "ADV" and word_lower[-5:] == "mente" and len(word) > 5:
+        root = word[:-5]
+        mente = word[-5:]
+        stress_root = get_word_stress(root, "ADJ", "")
+        stress_mente = get_word_stress(mente, "NOUN", "")
+        return {
+            'word': stress_root['word'] + stress_mente['word'],
+            "stress_position": stress_root['stress_position'] - len(
+                stress_mente['word']),
+            "secondary_stress_positions": [stress_mente['stress_position']],
+        }
     if len(syllable_list) == 1:
         first_monosyllable = syllable_list[0].lower()
         if ((first_monosyllable not in UNSTRESSED_UNACCENTED_MONOSYLLABLES)
@@ -489,7 +519,7 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
                      or pos not in ("SCONJ", "CCONJ", "DET", "PRON", "ADP")
                      or (pos == "PRON" and tag.get("Case") == "Nom")
                      or (pos == "DET" and tag.get("Definite") in (
-                         "Dem", "Ind"))
+                                "Dem", "Ind"))
                      or pos in ("PROPN", "NUM", "NOUN", "VERB", "AUX", "ADV")
                      or (pos == "ADJ" and tag.get("Poss", None) != "Yes")
                      or (pos == "PRON"
@@ -500,37 +530,50 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
                      or (pos in ("PRON", "DET")
                          and tag.get("PronType", None) in (
                                  "Exc", "Int", "Dem"))
-                     or "".join(word).lower() in POSSESSIVE_PRON)):
+                     or "".join(word).lower() in STRESSED_PRON) and (
+                        word_lower not in UNSTRESSED_FORMS)):
             stressed_position = -1
         else:
             stressed_position = 0  # unstressed monosyllable
-    elif (pos in ("INTJ", "PROPN", "NUM", "NOUN", "VERB", "AUX", "ADV")
-          or pos == "ADJ" and word_lower not in POSSESSIVE_PRON_UNSTRESSED
-          or (pos == "PRON" and tag.get("PronType", None) in ("Prs", "Ind"))
-          or (pos == "DET" and tag.get("PronType", None) in ("Dem", "Ind"))
-          or (pos == "DET" and tag.get("Definite", None) == "Ind")
-          or (pos == "PRON" and tag.get("Poss", None) == "Yes")
-          or (pos in ("PRON", "DET")
-              and tag.get("PronType", None) in ("Exc", "Int", "Dem"))
-          or (word_lower in POSSESSIVE_PRON)):
+    else:
         tilde = get_orthographic_accent(syllable_list)
-        # If an orthographic accent exists, the syllable negative index is saved
         if tilde is not None:
-            stressed_position = -(len(syllable_list) - tilde)
-        # Elif the word is paroxytone (llana) we save the penultimate syllable.
-        elif is_paroxytone(syllable_list):
-            stressed_position = -2
-        # If the word does not meet the above criteria that means that it's an
-        # oxytone word (aguda).
+            stressed_position = tilde - len(syllable_list)
+        elif (pos in ("INTJ", "PROPN", "NUM", "NOUN", "VERB", "AUX", "ADV")
+              or pos == "ADJ"
+              or (pos == "PRON" and tag.get("PronType", None) in (
+                        "Prs", "Ind"))
+              or (pos == "DET" and tag.get("PronType", None) in (
+                        "Dem", "Ind"))
+              or (pos == "DET" and tag.get("Definite", None) == "Ind")
+              or (pos == "PRON" and tag.get("Poss", None) == "Yes")
+              or (pos in ("PRON", "DET")
+                  and tag.get("PronType", None) in ("Exc", "Int", "Dem"))
+              or (word_lower in STRESSED_PRON)) and (
+                word_lower not in UNSTRESSED_FORMS) and (
+                word_lower not in POSSESSIVE_PRON_UNSTRESSED):
+            tilde = get_orthographic_accent(syllable_list)
+            # If an orthographic accent exists,
+            # the syllable negative index is saved
+            if tilde is not None:
+                stressed_position = -(len(syllable_list) - tilde)
+            # Elif the word is paroxytone (llana)
+            # we save the penultimate syllable.
+            elif is_paroxytone(syllable_list):
+                stressed_position = -2
+            # If the word does not meet the above criteria that means
+            # that it's an oxytone word (aguda).
+            else:
+                stressed_position = -1
         else:
-            stressed_position = -1
-    else:
-        stressed_position = 0  # unstressed
+            stressed_position = 0  # unstressed
     out_syllable_list = []
     for index, syllable in enumerate(syllable_list):
         out_syllable_list.append(
-            {"syllable": syllable,
-             "is_stressed": len(syllable_list) - index == -stressed_position})
+            {
+                "syllable": syllable,
+                "is_stressed": len(syllable_list) - index == -stressed_position
+            })
         if index < 1:
             continue
         # Sinaeresis
@@ -583,18 +626,53 @@ def get_words(word_list, alternative_syllabification=False):
             tags = spacy_tag_to_dict(tag)
             stressed_word = get_word_stress(word.text, pos, tags,
                                             alternative_syllabification)
-            first_syllable = get_last_syllable(syllabified_words)
-            second_syllable = stressed_word['word'][0]
-            # Synalepha
-            if first_syllable and second_syllable and have_prosodic_liaison(
-                    first_syllable, second_syllable):
-                first_syllable.update({'has_synalepha': True})
+            if word.pos_ in ("AUX", "VERB") and word._.affixes_length:
+                stressed_word.update(
+                    {'affixes_length': word._.affixes_length})
+                stressed_word.update({'pos': word.pos_, 'tag': word.tag_})
             syllabified_words.append(stressed_word)
         else:
             syllabified_words.append({"symbol": word.text})
+    syllabified_words = join_affixes(syllabified_words)
+    clean_word_list = [syll for syll in syllabified_words if "word" in syll]
+    # Synalepha
+    for index, word in enumerate(clean_word_list):
+        if len(clean_word_list) != index + 1:
+            first_syllable = clean_word_list[index]['word'][-1]
+            second_syllable = clean_word_list[index + 1]['word'][0]
+            if first_syllable and second_syllable and have_prosodic_liaison(
+                    first_syllable, second_syllable):
+                first_syllable.update({'has_synalepha': True})
     return syllabified_words
 
 
+def join_affixes(line):
+    """
+    Join affixes of split words and recalculates stress
+    :param line: List of syllabified words (dict)
+    :return: List of syllabified words (dict) with joined affixes
+    """
+    syllabified_words = []
+    indices_to_ignore = []
+    for index, word in enumerate(line):
+        affixes_length = word.get('affixes_length', None)
+        if index in indices_to_ignore:
+            continue
+        elif affixes_length is None:
+            syllabified_words.append(word)
+        else:
+            indices_to_ignore = range(index, index + affixes_length + 1)
+            join_word = []
+            for affix_index in indices_to_ignore:
+                affix = line[affix_index]['word']
+                join_word += [syll["syllable"] for syll in affix]
+            word_stress = get_word_stress("".join(join_word), word["pos"],
+                                          word["tag"])
+            word_stress["word"][-1]["is_word_end"] = True
+            syllabified_words.append(word_stress)
+    return syllabified_words if syllabified_words else line
+
+
 def get_scansion(text, rhyme_analysis=False, rhythm_format="pattern",
                  rhythmical_lengths=None):
     """
@@ -692,9 +770,9 @@ def generate_phonological_groups(tokens):
         syllables = get_syllables_word_end(words)
         for liaison in (
                 ("synalepha",),
+                ("synalepha", "sinaeresis"),
                 ("sinaeresis",),
                 ("sinaeresis", "synalepha"),
-                ("synalepha", "sinaeresis"),
         ):
             for ignore_synalepha_h in (break_on_h, None):
                 for liaison_positions_1 in generate_liaison_positions(
@@ -710,7 +788,7 @@ def generate_phonological_groups(tokens):
                         yield groups
                     else:
                         for liaison_positions_2 in generate_liaison_positions(
-                            syllables, liaison[1]
+                                syllables, liaison[1]
                         ):
                             yield get_phonological_groups(
                                 groups,
diff --git a/src/rantanplan/pipeline.py b/src/rantanplan/pipeline.py
index de5602b..2e659f1 100644
--- a/src/rantanplan/pipeline.py
+++ b/src/rantanplan/pipeline.py
@@ -29,10 +29,11 @@ def custom_tokenizer(nlp):
 _load_pipeline = {}
 
 
-def load_pipeline(lang=None):
+def load_pipeline(lang=None, split_affixes=True):
     """
     Loads the new pipeline with the custom tokenizer
     :param lang: Spacy language model
+    :param split_affixes: Whether or not to use spacy_affixes to split words
     :return: New custom language model
     """
     global _load_pipeline
@@ -41,9 +42,12 @@ def load_pipeline(lang=None):
     if lang not in _load_pipeline:
         nlp = spacy.load(lang)
         nlp.tokenizer = custom_tokenizer(nlp)
-        nlp.remove_pipe("affixes") if nlp.has_pipe("affixes") else None
-        suffixes = {k: v for k, v in load_affixes().items() if k.startswith(AFFIXES_SUFFIX)}
-        affixes_matcher = AffixesMatcher(nlp, split_on=["VERB"], rules=suffixes)
-        nlp.add_pipe(affixes_matcher, name="affixes", first=True)
+        if split_affixes:
+            nlp.remove_pipe("affixes") if nlp.has_pipe("affixes") else None
+            suffixes = {k: v for k, v in load_affixes().items() if
+                        k.startswith(AFFIXES_SUFFIX)}
+            affixes_matcher = AffixesMatcher(nlp, split_on=["VERB"],
+                                             rules=suffixes)
+            nlp.add_pipe(affixes_matcher, name="affixes", first=True)
         _load_pipeline[lang] = nlp
     return _load_pipeline[lang]
diff --git a/tests/fixtures/phonological_groups.json b/tests/fixtures/phonological_groups.json
index 32ad5e6..dda6e55 100644
--- a/tests/fixtures/phonological_groups.json
+++ b/tests/fixtures/phonological_groups.json
@@ -282,7 +282,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -290,14 +290,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "ce",
-      "is_stressed": false,
-      "has_synalepha": true,
-      "is_word_end": true
-    },
-    {
-      "syllable": "a",
-      "is_stressed": true
+      "syllable": "cea",
+      "is_stressed": true,
+      "synalepha_index": [
+        1
+      ]
     },
     {
       "syllable": "guas",
@@ -318,7 +315,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -328,7 +325,7 @@
     {
       "syllable": "ce",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -421,14 +418,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "rro",
-      "is_stressed": false,
-      "has_synalepha": false,
-      "is_word_end": true
-    },
-    {
-      "syllable": "ha",
-      "is_stressed": true
+      "syllable": "rroha",
+      "is_stressed": true,
+      "synalepha_index": [
+        2
+      ]
     },
     {
       "syllable": "cea",
@@ -454,14 +448,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "rro",
-      "is_stressed": false,
-      "has_synalepha": false,
-      "is_word_end": true
-    },
-    {
-      "syllable": "ha",
-      "is_stressed": true
+      "syllable": "rroha",
+      "is_stressed": true,
+      "synalepha_index": [
+        2
+      ]
     },
     {
       "syllable": "ce",
@@ -490,11 +481,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "rroha",
-      "is_stressed": true,
-      "synalepha_index": [
-        2
-      ]
+      "syllable": "rro",
+      "is_stressed": false,
+      "has_synalepha": false,
+      "is_word_end": true
+    },
+    {
+      "syllable": "ha",
+      "is_stressed": true
     },
     {
       "syllable": "cea",
@@ -520,11 +514,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "rroha",
-      "is_stressed": true,
-      "synalepha_index": [
-        2
-      ]
+      "syllable": "rro",
+      "is_stressed": false,
+      "has_synalepha": false,
+      "is_word_end": true
+    },
+    {
+      "syllable": "ha",
+      "is_stressed": true
     },
     {
       "syllable": "ce",
@@ -555,7 +552,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
@@ -563,11 +560,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "cea",
-      "is_stressed": true,
-      "synalepha_index": [
-        1
-      ]
+      "syllable": "ce",
+      "is_stressed": false,
+      "has_synalepha": true,
+      "is_word_end": true
+    },
+    {
+      "syllable": "a",
+      "is_stressed": true
     },
     {
       "syllable": "guas",
@@ -588,7 +588,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
@@ -598,7 +598,7 @@
     {
       "syllable": "ce",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
@@ -1164,7 +1164,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -1172,14 +1172,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "ce",
-      "is_stressed": false,
-      "has_synalepha": true,
-      "is_word_end": true
-    },
-    {
-      "syllable": "a",
-      "is_stressed": true
+      "syllable": "cea",
+      "is_stressed": true,
+      "synalepha_index": [
+        1
+      ]
     },
     {
       "syllable": "guas",
@@ -1200,7 +1197,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -1210,7 +1207,7 @@
     {
       "syllable": "ce",
       "is_stressed": false,
-      "has_synalepha": true,
+      "has_synalepha": false,
       "is_word_end": true
     },
     {
@@ -1303,14 +1300,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "rro",
-      "is_stressed": false,
-      "has_synalepha": false,
-      "is_word_end": true
-    },
-    {
-      "syllable": "ha",
-      "is_stressed": true
+      "syllable": "rroha",
+      "is_stressed": true,
+      "synalepha_index": [
+        2
+      ]
     },
     {
       "syllable": "cea",
@@ -1336,14 +1330,11 @@
       "is_stressed": true
     },
     {
-      "syllable": "rro",
-      "is_stressed": false,
-      "has_synalepha": false,
-      "is_word_end": true
-    },
-    {
-      "syllable": "ha",
-      "is_stressed": true
+      "syllable": "rroha",
+      "is_stressed": true,
+      "synalepha_index": [
+        2
+      ]
     },
     {
       "syllable": "ce",
@@ -1372,11 +1363,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "rroha",
-      "is_stressed": true,
-      "synalepha_index": [
-        2
-      ]
+      "syllable": "rro",
+      "is_stressed": false,
+      "has_synalepha": false,
+      "is_word_end": true
+    },
+    {
+      "syllable": "ha",
+      "is_stressed": true
     },
     {
       "syllable": "cea",
@@ -1402,11 +1396,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "rroha",
-      "is_stressed": true,
-      "synalepha_index": [
-        2
-      ]
+      "syllable": "rro",
+      "is_stressed": false,
+      "has_synalepha": false,
+      "is_word_end": true
+    },
+    {
+      "syllable": "ha",
+      "is_stressed": true
     },
     {
       "syllable": "ce",
@@ -1437,7 +1434,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
@@ -1445,11 +1442,14 @@
       "is_stressed": true
     },
     {
-      "syllable": "cea",
-      "is_stressed": true,
-      "synalepha_index": [
-        1
-      ]
+      "syllable": "ce",
+      "is_stressed": false,
+      "has_synalepha": true,
+      "is_word_end": true
+    },
+    {
+      "syllable": "a",
+      "is_stressed": true
     },
     {
       "syllable": "guas",
@@ -1470,7 +1470,7 @@
     {
       "syllable": "rro",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
@@ -1480,7 +1480,7 @@
     {
       "syllable": "ce",
       "is_stressed": false,
-      "has_synalepha": false,
+      "has_synalepha": true,
       "is_word_end": true
     },
     {
diff --git a/tests/fixtures/rhyme_analysis_sonnet.json b/tests/fixtures/rhyme_analysis_sonnet.json
index f17b740..bfbb64a 100644
--- a/tests/fixtures/rhyme_analysis_sonnet.json
+++ b/tests/fixtures/rhyme_analysis_sonnet.json
@@ -1828,21 +1828,15 @@
           },
           {
             "syllable": "dir",
-            "is_stressed": true,
-            "is_word_end": true
-          }
-        ],
-        "stress_position": -1
-      },
-      {
-        "word": [
+            "is_stressed": true
+          },
           {
             "syllable": "me",
             "is_stressed": false,
             "is_word_end": true
           }
         ],
-        "stress_position": 0
+        "stress_position": -2
       }
     ],
     "phonological_groups": [
@@ -1891,8 +1885,7 @@
       },
       {
         "syllable": "dir",
-        "is_stressed": true,
-        "is_word_end": true
+        "is_stressed": true
       },
       {
         "syllable": "me",
@@ -2143,21 +2136,15 @@
           },
           {
             "syllable": "tir",
-            "is_stressed": true,
-            "is_word_end": true
-          }
-        ],
-        "stress_position": -1
-      },
-      {
-        "word": [
+            "is_stressed": true
+          },
           {
             "syllable": "me",
             "is_stressed": false,
             "is_word_end": true
           }
         ],
-        "stress_position": 0
+        "stress_position": -2
       },
       {
         "symbol": "."
@@ -2209,8 +2196,7 @@
       },
       {
         "syllable": "tir",
-        "is_stressed": true,
-        "is_word_end": true
+        "is_stressed": true
       },
       {
         "syllable": "me",
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index bafab6a..c35092d 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -29,3 +29,21 @@ def mockreturn(lang=None):
             {"text": token.text, "pos_": token.pos_, "tag_": token.tag_,
             "n_rights": token.n_rights})  # noqa
     assert token_dict == test_dict_list
+
+
+def test_load_pipeline_affixes(monkeypatch):
+    def mockreturn(lang=None):
+        nlp = spacy.blank('es')  # noqa
+        nlp.vocab.lookups.get_table = lambda *_: {}
+        return nlp
+
+    monkeypatch.setattr(spacy, 'load', mockreturn)
+    # lang doesn't matter as long as it hasn't been used in the test session
+    nlp = load_pipeline("blank", split_affixes=False)
+    doc = nlp("prue-\nba")
+    token_dict = []
+    for token in doc:
+        token_dict.append(
+            {"text": token.text, "pos_": token.pos_, "tag_": token.tag_,
+            "n_rights": token.n_rights})  # noqa
+    assert token_dict == test_dict_list

From a94af95b98d9af7a8707b9c82b53b93503c81252 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Thu, 5 Dec 2019 16:38:42 +0100
Subject: [PATCH 2/6] Added 'AUX' to the split_on list for spacy affixes (#64)

---
 src/rantanplan/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rantanplan/pipeline.py b/src/rantanplan/pipeline.py
index 2e659f1..09c6a01 100644
--- a/src/rantanplan/pipeline.py
+++ b/src/rantanplan/pipeline.py
@@ -46,7 +46,7 @@ def load_pipeline(lang=None, split_affixes=True):
             nlp.remove_pipe("affixes") if nlp.has_pipe("affixes") else None
             suffixes = {k: v for k, v in load_affixes().items() if
                         k.startswith(AFFIXES_SUFFIX)}
-            affixes_matcher = AffixesMatcher(nlp, split_on=["VERB"],
+            affixes_matcher = AffixesMatcher(nlp, split_on=["VERB", "AUX"],
                                              rules=suffixes)
             nlp.add_pipe(affixes_matcher, name="affixes", first=True)
         _load_pipeline[lang] = nlp

From 9251220f9c14dfaec1f31df522d7235fc94148f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Wed, 18 Dec 2019 13:49:26 +0100
Subject: [PATCH 3/6] =?UTF-8?q?Bump=20version:=200.4.0=20=E2=86=92=200.4.1?=
 =?UTF-8?q?=20(#66)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 setup.cfg                  | 2 +-
 setup.py                   | 2 +-
 src/rantanplan/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 205706e..fc34ee2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.0
+current_version = 0.4.1
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index 3063eb9..e865b96 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def read(*names, **kwargs):
 
 setup(
     name='rantanplan',
-    version='0.4.0',
+    version='0.4.1',
     license='Apache Software License 2.0',
     description='Scansion tool for Spanish texts',
     long_description='%s\n%s' % (
diff --git a/src/rantanplan/__init__.py b/src/rantanplan/__init__.py
index ca16eef..0694a51 100644
--- a/src/rantanplan/__init__.py
+++ b/src/rantanplan/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.4.0'
+__version__ = '0.4.1'
 from .core import get_scansion  # noqa

From 6547aa5b90bcf86eb0a81af3ea9fd890e37a046c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Thu, 12 Mar 2020 12:11:13 +0100
Subject: [PATCH 4/6] Add documentation (#68)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added documentation

* Changed imports

* Bump version: 0.4.1 → 0.4.2

* Added usage

* Fixed requirements

* Unified syllabification variables and functions

* Added output example
---
 CHANGELOG.rst                                 |  51 +++
 README.rst                                    | 101 +++++-
 docs/conf.py                                  |   2 +-
 docs/reference/rantanplan.rst                 |   2 +-
 docs/usage.rst                                |  93 +++++-
 requirements.txt                              |   4 +-
 setup.cfg                                     |   2 +-
 setup.py                                      |   2 +-
 src/rantanplan/__init__.py                    |   2 +-
 src/rantanplan/core.py                        | 297 +++++++-----------
 ..._syllabification.py => syllabification.py} | 120 +++++++
 11 files changed, 473 insertions(+), 203 deletions(-)
 rename src/rantanplan/{alternative_syllabification.py => syllabification.py} (94%)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 37a7f2c..49ce68f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,57 @@
 Changelog
 =========
 
+0.4.2 (2020-03-11)
+------------------
+
+* Added documentation
+
+0.4.1 (2019-12-19)
+------------------
+
+* Added 'AUX' to the split_on list for spacy affixes
+* Fixed syllabification exceptions, support for disabling/enabling spacy_affixes
+* Fixed multiline break
+* Fixed splitted verb stresses and secondary stress on '-mente' adverbs
+* Fixed some issues
+* Added minimum length for '-mente' adverbs
+
+0.4.0 (2019-11-21)
+------------------
+
+* Added SpaCy Doc input support
+* Add umlaut hyatus
+* Added new hyatus and fixed init
+* Refactoring code
+* Feat/new syllabification
+* Naming conventions
+* Adding rhyme analaysis to scansion output
+* Adding 'singleton' behaviour to load_pipeline
+* Metre analysis w/ sinaeresis and synalephas
+* Added new workflow for syllabification, with tests
+* Post syllabification rules regexes
+* Added unit tests for all functions
+
+0.3.0 (2019-06-18)
+------------------
+
+* Added SpaCy Doc input support
+* Add umlaut hyatus
+* Fixed syllabyfication errors, affixes and the pipeline
+* Fixed hyphenator for diphthongs with u umlaut
+* Added hyphenation for explicit hyatus with umlaut vowels
+* Added new hyatus and fixed __init__
+
+0.2.0 (2019-06-14)
+------------------
+
+* Better hyphenator, and affixes and pipeline fixes
+
+0.1.2 (2019-06-10)
+------------------
+
+* Republishing on Pypi
+
 0.1.0 (2019-07-03)
 ------------------
 
diff --git a/README.rst b/README.rst
index dd4c9d5..bf40230 100644
--- a/README.rst
+++ b/README.rst
@@ -39,9 +39,9 @@ Overview
     :alt: PyPI Package latest release
     :target: https://pypi.org/project/rantanplan
 
-.. |commits-since| image:: https://img.shields.io/github/commits-since/linhd-postdata/rantanplan/v0.1.0.svg
+.. |commits-since| image:: https://img.shields.io/github/commits-since/linhd-postdata/rantanplan/0.4.2.svg
     :alt: Commits since latest release
-    :target: https://github.com/linhd-postdata/rantanplan/compare/v0.1.0...master
+    :target: https://github.com/linhd-postdata/rantanplan/compare/0.4.2...master
 
 .. |wheel| image:: https://img.shields.io/pypi/wheel/rantanplan.svg
     :alt: PyPI Wheel
@@ -69,6 +69,103 @@ Installation
 
     pip install rantanplan
 
+Usage
+=====
+
+Install required resources
+--------------------------
+
+#. Install spaCy model language for Spanish::
+
+        python -m spacy download es_core_news_md
+
+#. Install Freeling rules for affixes::
+
+        python -m spacy_affixes download es
+
+
+Import rantanplan
+-----------------
+
+To use rantanplan in a project::
+
+        import rantanplan
+
+Usage example
+-------------
+.. code-block:: python
+
+    from rantanplan.core import get_scansion
+    
+    poem = """Me gustas cuando callas porque estás como ausente,
+    y me oyes desde lejos, y mi voz no te toca.
+    Parece que los ojos se te hubieran volado
+    y parece que un beso te cerrara la boca.
+
+    Como todas las cosas están llenas de mi alma
+    emerges de las cosas, llena del alma mía.
+    Mariposa de sueño, te pareces a mi alma,
+    y te pareces a la palabra melancolía."""
+    
+    get_scansion(poem)
+
+Output example
+--------------
+
+.. code-block:: python
+
+    [{'tokens': [{'word': [{'syllable': 'Me',
+      'is_stressed': False,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'gus', 'is_stressed': True},
+     {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'cuan', 'is_stressed': False},
+     {'syllable': 'do', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'ca', 'is_stressed': True},
+     {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'por', 'is_stressed': False},
+     {'syllable': 'que',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'es', 'is_stressed': False},
+     {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True}],
+    'stress_position': -1},
+   {'word': [{'syllable': 'co', 'is_stressed': False},
+     {'syllable': 'mo',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'au', 'is_stressed': False},
+     {'syllable': 'sen', 'is_stressed': True},
+     {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'symbol': ','}],
+  'phonological_groups': [{'syllable': 'Me',
+    'is_stressed': False,
+    'is_word_end': True},
+   {'syllable': 'gus', 'is_stressed': True},
+   {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'cuan', 'is_stressed': False},
+   {'syllable': 'do', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'ca', 'is_stressed': True},
+   {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'por', 'is_stressed': False},
+   {'syllable': 'quees', 'is_stressed': False, 'synalepha_index': [2]},
+   {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True},
+   {'syllable': 'co', 'is_stressed': False},
+   {'syllable': 'moau', 'is_stressed': False, 'synalepha_index': [1]},
+   {'syllable': 'sen', 'is_stressed': True},
+   {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+  'rhythm': {'stress': '-+---+---+--+-', 'type': 'pattern', 'length': 14}},
+   ...
+
 Documentation
 =============
 
diff --git a/docs/conf.py b/docs/conf.py
index 43d5926..d26561d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,7 +26,7 @@
 year = '2019'
 author = 'LINHD POSTDATA Project'
 copyright = '{0}, {1}'.format(year, author)
-version = release = '0.1.0'
+version = release = '0.4.2'
 
 pygments_style = 'trac'
 templates_path = ['.']
diff --git a/docs/reference/rantanplan.rst b/docs/reference/rantanplan.rst
index e63ebb4..f583606 100644
--- a/docs/reference/rantanplan.rst
+++ b/docs/reference/rantanplan.rst
@@ -5,5 +5,5 @@ rantanplan
 
     from rantanplan import *
 
-.. automodule:: rantanplan
+.. automodule:: rantanplan.core
     :members:
diff --git a/docs/usage.rst b/docs/usage.rst
index 9673afb..f7a6aae 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -1,7 +1,96 @@
-=====
 Usage
 =====
 
+Install required resources
+--------------------------
+
+#. Install spaCy model language for Spanish::
+
+        python -m spacy download es_core_news_md
+
+#. Install Freeling rules for affixes::
+
+        python -m spacy_affixes download es
+
+
+Import rantanplan
+-----------------
+
 To use rantanplan in a project::
 
-	import rantanplan
+        import rantanplan
+
+Usage example
+-------------
+.. code-block:: python
+
+    from rantanplan.core import get_scansion
+    
+    poem = """Me gustas cuando callas porque estás como ausente,
+    y me oyes desde lejos, y mi voz no te toca.
+    Parece que los ojos se te hubieran volado
+    y parece que un beso te cerrara la boca.
+
+    Como todas las cosas están llenas de mi alma
+    emerges de las cosas, llena del alma mía.
+    Mariposa de sueño, te pareces a mi alma,
+    y te pareces a la palabra melancolía."""
+    
+    get_scansion(poem)
+
+Output example
+--------------
+
+.. code-block:: python
+
+    [{'tokens': [{'word': [{'syllable': 'Me',
+      'is_stressed': False,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'gus', 'is_stressed': True},
+     {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'cuan', 'is_stressed': False},
+     {'syllable': 'do', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'ca', 'is_stressed': True},
+     {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'por', 'is_stressed': False},
+     {'syllable': 'que',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'es', 'is_stressed': False},
+     {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True}],
+    'stress_position': -1},
+   {'word': [{'syllable': 'co', 'is_stressed': False},
+     {'syllable': 'mo',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'au', 'is_stressed': False},
+     {'syllable': 'sen', 'is_stressed': True},
+     {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'symbol': ','}],
+  'phonological_groups': [{'syllable': 'Me',
+    'is_stressed': False,
+    'is_word_end': True},
+   {'syllable': 'gus', 'is_stressed': True},
+   {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'cuan', 'is_stressed': False},
+   {'syllable': 'do', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'ca', 'is_stressed': True},
+   {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'por', 'is_stressed': False},
+   {'syllable': 'quees', 'is_stressed': False, 'synalepha_index': [2]},
+   {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True},
+   {'syllable': 'co', 'is_stressed': False},
+   {'syllable': 'moau', 'is_stressed': False, 'synalepha_index': [1]},
+   {'syllable': 'sen', 'is_stressed': True},
+   {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+  'rhythm': {'stress': '-+---+---+--+-', 'type': 'pattern', 'length': 14}},
+   ...
diff --git a/requirements.txt b/requirements.txt
index ce477bc..64e65ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 Click>=7.0
-spacy>=2.1
-spacy_affixes
\ No newline at end of file
+spacy>=2.2
+spacy_affixes
diff --git a/setup.cfg b/setup.cfg
index fc34ee2..95ca53b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.1
+current_version = 0.4.2
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index e865b96..ac04a19 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def read(*names, **kwargs):
 
 setup(
     name='rantanplan',
-    version='0.4.1',
+    version='0.4.2',
     license='Apache Software License 2.0',
     description='Scansion tool for Spanish texts',
     long_description='%s\n%s' % (
diff --git a/src/rantanplan/__init__.py b/src/rantanplan/__init__.py
index 0694a51..3907fa6 100644
--- a/src/rantanplan/__init__.py
+++ b/src/rantanplan/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.4.1'
+__version__ = '0.4.2'
 from .core import get_scansion  # noqa
diff --git a/src/rantanplan/core.py b/src/rantanplan/core.py
index e1c9cd2..becc165 100644
--- a/src/rantanplan/core.py
+++ b/src/rantanplan/core.py
@@ -13,143 +13,44 @@
 
 from spacy.tokens import Doc
 
-from .alternative_syllabification import ALTERNATIVE_SYLLABIFICATION
-from .alternative_syllabification import SYLLABIFICATOR_FOREIGN_WORDS_DICT
 from .pipeline import load_pipeline
 from .rhymes import STRUCTURES_LENGTH
 from .rhymes import analyze_rhyme
-
-"""
-Syllabification
-"""
-accents_re = re.compile("[áéíóú]", re.I | re.U)
-paroxytone_re = re.compile("([aeiou]|n|[aeiou]s)$",
-                           # checks if a str ends in unaccented vowel/N/S
-                           re.I | re.U)
-
-"""
-Regular expressions for spanish syllabification.
-For the 'tl' cluster we have decided to join the two letters
-because is the most common syllabification and the same that
-Perkins (http://sadowsky.cl/perkins.html), DIRAE (https://dirae.es/),
-and Educalingo (https://educalingo.com/es/dic-es) use.
-"""
-letter_clusters_re = re.compile(r"""
-    # 1: weak vowels diphthong with h
-    ([iuü]h[iuü])|
-    # 2: open vowels
-    ([aáeéíoóú]h[iuü])|
-    # 3: closed vowels
-    ([iuü]h[aáeéíoóú])|
-    # 4: liquid and mute consonants (adds hyphen)
-    ([a-záéíóúñ](?:(?:[bcdfghjklmnñpqstvy][hlr])|
-    (?:[bcdfghjklmnñpqrstvy][hr])|
-    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
-    # 5: any char followed by liquid and mute consonant,
-    # exceptions for 'r+l' and 't+l'
-    ((?:(?:[bcdfghjklmnñpqstvy][hlr])|
-    (?:[bcdfghjklmnñpqrstvy][hr])|
-    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
-    # 6: non-liquid consonant (adds hyphen)
-    ([a-záéíóúñ][bcdfghjklmnñpqrstvxyz][aáeéiíoóuúüï])|
-    # 7: vowel group (adds hyphen)
-    ([aáeéíoóú][aáeéíoóú])|
-    # 8: umlaut 'u' diphthongs
-    (ü[iíaeo])|
-    # 9: Explicit hiatus with umlaut vowels, first part
-    ([aeiou][äëïöü])|
-    #10: Explicit hiatus with umlaut vowels, second part
-    ([üäëïö][a-z])|
-    #11: any char
-    ([a-záéíóúñ])""", re.I | re.U | re.VERBOSE)  # VERBOSE to catch the group
-
-"""
-Rhythmical Analysis
-"""
-SPACE = "SPACE"
-STRONG_VOWELS = set("aeoáéóÁÉÓAEO")
-WEAK_VOWELS = set("iuüíúIÍUÜÚ")
-LIAISON_FIRST_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚyY")
-LIAISON_SECOND_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚhyYH")
-
-STRESSED_UNACCENTED_MONOSYLLABLES = {"yo", "vio", "dio", "fe", "sol", "ti",
-                                     "un"}
-
-UNSTRESSED_UNACCENTED_MONOSYLLABLES = {'de', 'el', 'la', 'las', 'le', 'les',
-                                       'lo', 'los',
-                                       'mas', 'me', 'mi', 'nos', 'os', 'que',
-                                       'se', 'si',
-                                       'su', 'tan', 'te', 'tu', "tus", "oh"}
-
-UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
-                    "cuando", "cuanto", "como", "cuantas", "cuantos"}
-
-STRESSED_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
-                 "tuyas", "suyo", "suya", "suyos", "suyas", "todo"}
-
-POSSESSIVE_PRON_UNSTRESSED = {"nuestro", "nuestra", "nuestros", "nuestras",
-                              "vuestro", "vuestra", "vuestros", "vuestras"}
-
-"""
-Regular expressions and rules for syllabification exceptions
-"""
-
-# Words starting with prefixes SIN-/DES- followed by consonant "destituir"
-PREFIX_DES_WITH_CONSONANT_RE = (
-    re.compile("^(des)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
-
-# Words starting with prefixes SIN-/DES- followed by consonant "sinhueso"
-PREFIX_SIN_WITH_CONSONANT_RE = (
-    re.compile("^(sin)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
-
-# Group consonant+[hlr] with exceptions for ll
-CONSONANT_GROUP = (re.compile("(.*[hmnqsw])([hlr][aeiouáéíóú].*)", re.I | re.U))
-CONSONANT_GROUP_EXCEPTION_LL = (
-    re.compile("(.*[hlmnqsw])([hr][aeiouáéíóú].*)", re.I | re.U))
-CONSONANT_GROUP_EXCEPTION_DL = (
-    re.compile("(.*[d])([l][aeiouáéíóú].*)", re.I | re.U))
-
-# Group vowel+ w + vowel
-W_VOWEL_GROUP = (re.compile("(.*[aeiouáéíóú])(w[aeiouáéíóú].*)", re.I | re.U))
-
-# Post-syllabification exceptions for consonant clusters and diphthongs
-# Explicitit hiatus on first vowel
-HIATUS_FIRST_VOWEL_RE = (re.compile(
-    "(?:(.*-)|^)([äëïö]|[^g]ü)([aeiouúáéíó].*)",
-    re.I | re.U | re.VERBOSE))
-
-# Consonant cluster. Example: 'cneorácea'
-CONSONANT_CLUSTER_RE = (re.compile(
-    "(?:(.*-)|^)([mpgc])-([bcdfghjklmñnpqrstvwxyz][aeioáéíó].*)",
-    re.I | re.U | re.VERBOSE))
-
-# Lowering diphthong. Example: 'ahijador'
-LOWERING_DIPHTHONGS_WITH_H = (
-    re.compile(
-        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
-        ([aeo])-(h[iu](?![aeoiuíúáéó]).*)""",
-        re.I | re.U | re.VERBOSE))
-
-# Lowering diphthong. Example: 'buhitiho'
-RAISING_DIPHTHONGS_WITH_H = (
-    re.compile(
-        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
-        ([iu])-(h[aeiouáéó](?![aeoáéiuíú]).*)""",
-        re.I | re.U | re.VERBOSE))
-
-"""
-Rhythmical Analysis functions
-"""
+from .syllabification import ALTERNATIVE_SYLLABIFICATION
+from .syllabification import CONSONANT_CLUSTER_RE
+from .syllabification import CONSONANT_GROUP
+from .syllabification import CONSONANT_GROUP_EXCEPTION_DL
+from .syllabification import CONSONANT_GROUP_EXCEPTION_LL
+from .syllabification import HIATUS_FIRST_VOWEL_RE
+from .syllabification import LIAISON_FIRST_PART
+from .syllabification import LIAISON_SECOND_PART
+from .syllabification import LOWERING_DIPHTHONGS_WITH_H
+from .syllabification import POSSESSIVE_PRON_UNSTRESSED
+from .syllabification import PREFIX_DES_WITH_CONSONANT_RE
+from .syllabification import PREFIX_SIN_WITH_CONSONANT_RE
+from .syllabification import RAISING_DIPHTHONGS_WITH_H
+from .syllabification import SPACE
+from .syllabification import STRESSED_PRON
+from .syllabification import STRESSED_UNACCENTED_MONOSYLLABLES
+from .syllabification import STRONG_VOWELS
+from .syllabification import SYLLABIFICATOR_FOREIGN_WORDS_DICT
+from .syllabification import UNSTRESSED_FORMS
+from .syllabification import UNSTRESSED_UNACCENTED_MONOSYLLABLES
+from .syllabification import W_VOWEL_GROUP
+from .syllabification import WEAK_VOWELS
+from .syllabification import accents_re
+from .syllabification import letter_clusters_re
+from .syllabification import paroxytone_re
 
 
 def have_prosodic_liaison(first_syllable, second_syllable):
-    """
-    Checkfor prosodic liaison between two syllables
-    :param first_syllable: dic with key syllable (str) and is_stressed (bool)
-                           representing the first syllable
-    :param second_syllable: dic with key syllable (str) and is_stressed (bool)
-                            representing the second syllable
-    :return: True if there is prosodic liaison and False otherwise
+    """Checks for prosodic liaison between two syllables
+
+    :param first_syllable: Dictionary with key syllable (str) and is_stressed (bool) representing
+        the first syllable
+    :param second_syllable: Dictionary with key syllable (str) and is_stressed (bool)
+        representing the second syllable
+    :return: `True` if there is prosodic liaison and `False` otherwise
     :rtype: bool
     """
     if second_syllable['syllable'][0].lower() == 'y' and (
@@ -162,10 +63,11 @@ def have_prosodic_liaison(first_syllable, second_syllable):
 
 
 def get_syllables_word_end(words):
-    """
-    Get a list of syllables from a list of words extracting word boundaries
+    """Get a list of syllables from a list of words extracting word boundaries
+
     :param words: List of dictonaries of syllables for each word in a line
     :return: List of dictionaries of syllables with an extra is_word_end key
+    :rtype: list
     """
     syllables = []
     for word in words:
@@ -180,17 +82,18 @@ def get_syllables_word_end(words):
 
 def get_phonological_groups(word_syllables, liaison_type="synalepha",
                             breakage_func=None, liaison_positions=None):
-    """
-    Get a list of dictionaries for each phonological group on a line
+    """Get a list of dictionaries for each phonological group on a line
     and joins the syllables to create phonological groups (pronounced together)
     according to a type of liaison, either synaloepha or sinaeresis
+
     :param word_syllables: List of dictionaries for each word of the line
     :param liaison_type: Which liaison is going to be performed synalepha or
-                         sinaeresis
+        sinaeresis
     :param breakage_func: Function to decide when not to break a liaison that is
-    specified in liaison_positions
+        specified in liaison_positions
     :param liaison_positions: Positions of the liaisons
     :return: A list of conjoined syllables
+    :rtype: list
     """
     syllables = word_syllables[:]
     liaison_property = f"has_{liaison_type}"
@@ -240,13 +143,14 @@ def get_phonological_groups(word_syllables, liaison_type="synalepha",
 
 
 def clean_phonological_groups(groups, liaison_positions, liaison_property):
-    """
-    Clean phonological groups so their liaison property is consistently set
+    """Clean phonological groups so their liaison property is consistently set
     according to the the liaison positions
+
     :param groups: Phonological groups to be cleaned
     :param liaison_positions: Positions of the liaisons
     :param liaison_property: The liaison type (synaeresis or synalepha)
-    :return:
+    :return: Cleaned phonological groups
+    :rtype: dict
     """
     clean_groups = []
     for idx, group in enumerate(groups):
@@ -260,12 +164,13 @@ def clean_phonological_groups(groups, liaison_positions, liaison_property):
 
 
 def get_rhythmical_pattern(phonological_groups, rhythm_format="pattern"):
-    """
-    Gets a rhythm pattern for a poem in either "pattern": "-++-+-+-"
+    """Gets a rhythm pattern for a poem in either "pattern": "-++-+-+-"
     "binary": "01101010" or "indexed": [1,2,4,6] format
+
     :param phonological_groups: a dictionary with the syllables of the line
     :param rhythm_format: The output format for the rhythm
     :return: Dictionary with with rhythm and phonologic groups
+    :rtype: dict
     """
     stresses = get_stresses(phonological_groups)
     stress = format_stress(stresses, rhythm_format)
@@ -277,15 +182,16 @@ def get_rhythmical_pattern(phonological_groups, rhythm_format="pattern"):
 
 
 def get_stresses(phonological_groups):
-    """
-    Gets a list of stress marks (True for stressed, False for unstressed) from a
-    list of phonological groups applying rules depending on the ending stress.
+    """Gets a list of stress marks (`True` for stressed, `False` for unstressed)
+    from a list of phonological groups applying rules depending on the ending
+    stress.
+
     :param phonological_groups: a dictionary with the phonological groups
-    (syllables) of the line
+        (syllables) of the line
     :return: List of boolean values indicating whether a group is
-    stressed (True) or not (False)
+        stressed (`True`) or not (`False`)
+    :rtype: list
     """
-    # stresses = [group["is_stressed"] for group in phonological_groups]
     stresses = []
     last_word_syllables = []
     for group in phonological_groups:
@@ -312,16 +218,17 @@ def get_stresses(phonological_groups):
 
 
 def format_stress(stresses, rhythm_format="pattern", indexed_separator="-"):
-    """
-    Converts a list of boolean elements into a string that matches the chosen
-    rhythm format:
-                "indexed": 2,5,8
-                "pattern": -++--+-+-
-                "binary": 01101001
+    """Converts a list of boolean elements into a string that matches the chosen
+        rhythm format:
+        "indexed": 2,5,8
+        "pattern": -++--+-+-
+        "binary": 01101001
+
     :param stresses: List of boolean elements representing stressed syllables
     :param rhythm_format: Format to be used: indexed, pattern, or binary
     :param indexed_separator: String to use as a separator for indexed pattern
     :return: String with the stress pattern
+    :rtype: str
     """
     separator = ""
     if rhythm_format == 'indexed':
@@ -342,11 +249,12 @@ def format_stress(stresses, rhythm_format="pattern", indexed_separator="-"):
 
 
 def apply_exception_rules(word):
-    """
-    Applies presyllabification rules to a word,
+    """Applies presyllabification rules to a word,
     based on Antonio Ríos Mestre's work
+
     :param word: A string to be checked for exceptions
     :return: A string with the presyllabified word
+    :rtype: str
     """
     # Vowel + w + vowel group
     if W_VOWEL_GROUP.match(word):
@@ -380,11 +288,12 @@ def apply_exception_rules(word):
 
 
 def apply_exception_rules_post(word):
-    """
-    Applies presyllabification rules to a word,
+    """Applies presyllabification rules to a word,
     based on Antonio Ríos Mestre's work
+
     :param word: A string to be checked for exceptions
     :return: A string with the presyllabified word with hyphens
+    :rtype: str
     """
     # We make one pass for every match found so we can perform
     # several substitutions
@@ -403,12 +312,12 @@ def apply_exception_rules_post(word):
 
 
 def syllabify(word, alternative_syllabification=False):
-    """
-    Syllabifies a word.
+    """Syllabifies a word.
+
     :param word: The word to be syllabified.
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
-    :return: list of syllables and exceptions where appropriate.
+        syllabification is used
+    :return: List of syllables and exceptions where appropriate.
     :rtype: list
     """
     output = ""
@@ -438,10 +347,10 @@ def syllabify(word, alternative_syllabification=False):
 
 
 def get_orthographic_accent(syllable_list):
-    """
-    Given a list of str representing syllables,
+    """Given a list of str representing syllables,
     return position in the list of a syllable bearing
     orthographic stress (with the acute accent mark in Spanish)
+
     :param syllable_list: list of syllables as str or unicode each
     :return: Position or None if no orthographic stress
     :rtype: int
@@ -456,11 +365,11 @@ def get_orthographic_accent(syllable_list):
 
 
 def is_paroxytone(syllables):
-    """
-    Given a list of str representing syllables from a single word,
+    """Given a list of str representing syllables from a single word,
     check if it is paroxytonic (llana) or not
+
     :param syllables: List of syllables as str
-    :return: True if paroxytone, False if not
+    :return: `True` if paroxytone, `False` if not
     :rtype: bool
     """
     if not get_orthographic_accent("".join(syllables)):
@@ -469,12 +378,13 @@ def is_paroxytone(syllables):
 
 
 def spacy_tag_to_dict(tag):
-    """
-    Creater a dict from spacy pos tags
+    """Creates a dict from spacy pos tags
+
     :param tag: Extended spacy pos tag
-    ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
+        ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
     :return: A dictionary in the form of
-    "{'Definite': 'Ind', 'Gender': 'Masc', 'Number': 'Sing', 'PronType': 'Art'}"
+        "{'Definite': 'Ind', 'Gender': 'Masc', 'Number': 'Sing',
+        'PronType': 'Art'}"
     :rtype: dict
     """
     if tag and '=' in tag:
@@ -484,18 +394,17 @@ def spacy_tag_to_dict(tag):
 
 
 def get_word_stress(word, pos, tag, alternative_syllabification=False):
-    """
-    Gets a list of syllables from a word and creates a list with syllabified
+    """Gets a list of syllables from a word and creates a list with syllabified
     word and stressed syllable index
+
     :param word: Word string
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
+        syllabification is used
     :param pos: PoS tag from spacy ("DET")
     :param tag: Extended PoS tag info from spacy
-    ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
+        ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
     :return: Dict with [original syllab word, stressed syllabified word,
-    negative index position of stressed syllable or 0
-    if not stressed]
+        negative index position of stressed syllable or 0 if not stressed]
     :rtype: dict
     """
     syllable_list, _ = syllabify(word, alternative_syllabification)
@@ -593,10 +502,11 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
 
 
 def get_last_syllable(token_list):
-    """
-    Gets last syllable from a word in a dictionary
+    """Gets last syllable from a word in a dictionary
+
     :param token_list: list of dictionaries with line tokens
     :return: Last syllable
+    :rtype: str
     """
     if len(token_list) > 0:
         for token in token_list[::-1]:
@@ -605,14 +515,14 @@ def get_last_syllable(token_list):
 
 
 def get_words(word_list, alternative_syllabification=False):
-    """
-    Gets a list of syllables from a word and creates a list with syllabified
+    """Gets a list of syllables from a word and creates a list with syllabified
     word and stressed syllabe index
+
     :param word_list: List of spacy objects representing a word or sentence
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
+        syllabification is used
     :return: List with [original syllab. word, stressed syllab. word, negative
-    index position of stressed syllable]
+        index position of stressed syllable]
     :rtype: list
     """
     syllabified_words = []
@@ -647,10 +557,11 @@ def get_words(word_list, alternative_syllabification=False):
 
 
 def join_affixes(line):
-    """
-    Join affixes of split words and recalculates stress
+    """Join affixes of split words and recalculates stress
+
     :param line: List of syllabified words (dict)
     :return: List of syllabified words (dict) with joined affixes
+    :rtype: list
     """
     syllabified_words = []
     indices_to_ignore = []
@@ -675,13 +586,13 @@ def join_affixes(line):
 
 def get_scansion(text, rhyme_analysis=False, rhythm_format="pattern",
                  rhythmical_lengths=None):
-    """
-    Generates a list of dictionaries for each line
+    """Generates a list of dictionaries for each line
+
     :param text: Full text to be analyzed
     :param rhyme_analysis: Specify if rhyme analysis is to be performed
     :param rhythm_format: output format for rhythm analysis
     :param rhythmical_lengths: List with explicit rhythmical lengths per line
-    that the analysed lines has to meet
+        that the analysed lines has to meet
     :return: list of dictionaries per line
     :rtype: list
     """
@@ -760,10 +671,11 @@ def break_on_h(liaison_type, syllable_left, syllable_right):
 
 
 def generate_phonological_groups(tokens):
-    """
-    Generates phonological groups from a list of tokens
+    """Generates phonological groups from a list of tokens
+
     :param tokens: list of spaCy tokens
     :return: Generator with a list of phonological groups
+    :rtype: generator
     """
     for alternative_syllabification in (True, False):
         words = get_words(tokens, alternative_syllabification)
@@ -799,11 +711,12 @@ def generate_phonological_groups(tokens):
 
 
 def generate_liaison_positions(syllables, liaison):
-    """
-    Generates all possible combinations for the liaisons on a list of syllables
+    """Generates all possible combinations for the liaisons on a list of syllables
+
     :param syllables: List of syllables with
     :param liaison: Type of liaison combination to be generated
     :return: Generator with a list of possible combinations
+    :rtype: generator
     """
     positions = [int(syllable.get(f"has_{liaison}", 0))
                  for syllable in syllables]
diff --git a/src/rantanplan/alternative_syllabification.py b/src/rantanplan/syllabification.py
similarity index 94%
rename from src/rantanplan/alternative_syllabification.py
rename to src/rantanplan/syllabification.py
index f1d0bde..38a3281 100644
--- a/src/rantanplan/alternative_syllabification.py
+++ b/src/rantanplan/syllabification.py
@@ -1,3 +1,123 @@
+import re
+
+"""
+Syllabification
+"""
+accents_re = re.compile("[áéíóú]", re.I | re.U)
+paroxytone_re = re.compile("([aeiou]|n|[aeiou]s)$",
+                           # checks if a str ends in unaccented vowel/N/S
+                           re.I | re.U)
+
+"""
+Regular expressions for spanish syllabification.
+For the 'tl' cluster we have decided to join the two letters
+because is the most common syllabification and the same that
+Perkins (http://sadowsky.cl/perkins.html), DIRAE (https://dirae.es/),
+and Educalingo (https://educalingo.com/es/dic-es) use.
+"""
+letter_clusters_re = re.compile(r"""
+    # 1: weak vowels diphthong with h
+    ([iuü]h[iuü])|
+    # 2: open vowels
+    ([aáeéíoóú]h[iuü])|
+    # 3: closed vowels
+    ([iuü]h[aáeéíoóú])|
+    # 4: liquid and mute consonants (adds hyphen)
+    ([a-záéíóúñ](?:(?:[bcdfghjklmnñpqstvy][hlr])|
+    (?:[bcdfghjklmnñpqrstvy][hr])|
+    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
+    # 5: any char followed by liquid and mute consonant,
+    # exceptions for 'r+l' and 't+l'
+    ((?:(?:[bcdfghjklmnñpqstvy][hlr])|
+    (?:[bcdfghjklmnñpqrstvy][hr])|
+    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
+    # 6: non-liquid consonant (adds hyphen)
+    ([a-záéíóúñ][bcdfghjklmnñpqrstvxyz][aáeéiíoóuúüï])|
+    # 7: vowel group (adds hyphen)
+    ([aáeéíoóú][aáeéíoóú])|
+    # 8: umlaut 'u' diphthongs
+    (ü[iíaeo])|
+    # 9: Explicit hiatus with umlaut vowels, first part
+    ([aeiou][äëïöü])|
+    #10: Explicit hiatus with umlaut vowels, second part
+    ([üäëïö][a-z])|
+    #11: any char
+    ([a-záéíóúñ])""", re.I | re.U | re.VERBOSE)  # VERBOSE to catch the group
+
+"""
+Rhythmical Analysis
+"""
+SPACE = "SPACE"
+STRONG_VOWELS = set("aeoáéóÁÉÓAEO")
+WEAK_VOWELS = set("iuüíúIÍUÜÚ")
+LIAISON_FIRST_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚyY")
+LIAISON_SECOND_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚhyYH")
+
+STRESSED_UNACCENTED_MONOSYLLABLES = {"yo", "vio", "dio", "fe", "sol", "ti",
+                                     "un"}
+
+UNSTRESSED_UNACCENTED_MONOSYLLABLES = {'de', 'el', 'la', 'las', 'le', 'les',
+                                       'lo', 'los',
+                                       'mas', 'me', 'mi', 'nos', 'os', 'que',
+                                       'se', 'si',
+                                       'su', 'tan', 'te', 'tu', "tus", "oh"}
+
+UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
+                    "cuando", "cuanto", "como", "cuantas", "cuantos"}
+
+STRESSED_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
+                 "tuyas", "suyo", "suya", "suyos", "suyas", "todo"}
+
+POSSESSIVE_PRON_UNSTRESSED = {"nuestro", "nuestra", "nuestros", "nuestras",
+                              "vuestro", "vuestra", "vuestros", "vuestras"}
+
+"""
+Regular expressions and rules for syllabification exceptions
+"""
+
+# Words starting with prefixes SIN-/DES- followed by consonant "destituir"
+PREFIX_DES_WITH_CONSONANT_RE = (
+    re.compile("^(des)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
+
+# Words starting with prefixes SIN-/DES- followed by consonant "sinhueso"
+PREFIX_SIN_WITH_CONSONANT_RE = (
+    re.compile("^(sin)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
+
+# Group consonant+[hlr] with exceptions for ll
+CONSONANT_GROUP = (re.compile("(.*[hmnqsw])([hlr][aeiouáéíóú].*)", re.I | re.U))
+CONSONANT_GROUP_EXCEPTION_LL = (
+    re.compile("(.*[hlmnqsw])([hr][aeiouáéíóú].*)", re.I | re.U))
+CONSONANT_GROUP_EXCEPTION_DL = (
+    re.compile("(.*[d])([l][aeiouáéíóú].*)", re.I | re.U))
+
+# Group vowel+ w + vowel
+W_VOWEL_GROUP = (re.compile("(.*[aeiouáéíóú])(w[aeiouáéíóú].*)", re.I | re.U))
+
+# Post-syllabification exceptions for consonant clusters and diphthongs
+# Explicitit hiatus on first vowel
+HIATUS_FIRST_VOWEL_RE = (re.compile(
+    "(?:(.*-)|^)([äëïö]|[^g]ü)([aeiouúáéíó].*)",
+    re.I | re.U | re.VERBOSE))
+
+# Consonant cluster. Example: 'cneorácea'
+CONSONANT_CLUSTER_RE = (re.compile(
+    "(?:(.*-)|^)([mpgc])-([bcdfghjklmñnpqrstvwxyz][aeioáéíó].*)",
+    re.I | re.U | re.VERBOSE))
+
+# Lowering diphthong. Example: 'ahijador'
+LOWERING_DIPHTHONGS_WITH_H = (
+    re.compile(
+        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
+        ([aeo])-(h[iu](?![aeoiuíúáéó]).*)""",
+        re.I | re.U | re.VERBOSE))
+
+# Lowering diphthong. Example: 'buhitiho'
+RAISING_DIPHTHONGS_WITH_H = (
+    re.compile(
+        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
+        ([iu])-(h[aeiouáéó](?![aeoáéiuíú]).*)""",
+        re.I | re.U | re.VERBOSE))
+
 """
 Exceptions for foreign words in Spanish that do not follow
 standard Spanish syllabification rules

From fa6e630b342b47647417e3d41182f11489a782e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Tue, 24 Mar 2020 10:04:52 +0100
Subject: [PATCH 5/6] Added support for filtering consecutive liaisons and
 syllabification exceptions (#70)

* Added support for filtering consecutive liaisons and syllabification exceptions

* Added missing documentation
---
 src/rantanplan/core.py            | 20 +++++++++++++++++++-
 src/rantanplan/syllabification.py | 13 +++++++++++--
 tests/test_core.py                | 13 +++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/rantanplan/core.py b/src/rantanplan/core.py
index becc165..f6d99dc 100644
--- a/src/rantanplan/core.py
+++ b/src/rantanplan/core.py
@@ -725,8 +725,26 @@ def generate_liaison_positions(syllables, liaison):
     liaison_indices = [
         index for index, position in enumerate(positions) if position
     ]
+    # Prioritize single liaisons
+    non_single_liaisons = []
     for combination in combinations:
         liaison_positions = [0] * len(positions)
         for index, liaison_index in enumerate(liaison_indices):
             liaison_positions[liaison_index] = combination[index]
-        yield liaison_positions
+        if has_single_liaisons(liaison_positions):
+            yield liaison_positions
+        else:
+            non_single_liaisons.append(liaison_positions)
+    for liaison_position in non_single_liaisons:
+        yield liaison_position
+
+
+def has_single_liaisons(liaisons):
+    """Checks whether liaisons (a list of 1's and 0's) has consecutive liaisons
+        (1's) or not
+
+    :param liaisons: List of possible liaisons to apply per phonological group
+    :return: True if no consecutive liaisons, False otherwise
+    :rtype: bool
+    """
+    return not any(i == j == 1 for i, j in zip(liaisons, liaisons[1:]))
diff --git a/src/rantanplan/syllabification.py b/src/rantanplan/syllabification.py
index 38a3281..d43d523 100644
--- a/src/rantanplan/syllabification.py
+++ b/src/rantanplan/syllabification.py
@@ -60,7 +60,7 @@
                                        'lo', 'los',
                                        'mas', 'me', 'mi', 'nos', 'os', 'que',
                                        'se', 'si',
-                                       'su', 'tan', 'te', 'tu', "tus", "oh"}
+                                       'su', 'tan', 'te', 'tu', "tus", "oh", "pues"}
 
 UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
                     "cuando", "cuanto", "como", "cuantas", "cuantos"}
@@ -1327,6 +1327,10 @@
     'sexuados': (['se', 'xua', 'dos'], [(['se', 'xu', 'a', 'dos'], (1, 2))]),
     'sexual': (['se', 'xual'], [(['se', 'xu', 'al'], (1, 2))]),
     'suave': (['sua', 've'], [(['su', 'a', 've'], (0, 1))]),
+    'suntuoso': (['sun', 'tuo', 'so'], [(['sun', 'tu', 'o', 'so'], (2, 3))]),
+    'suntuosa': (['sun', 'tuo', 'sa'], [(['sun', 'tu', 'o', 'sa'], (2, 3))]),
+    'suntuosos': (['sun', 'tuo', 'sos'], [(['sun', 'tu', 'o', 'sos'], (2, 3))]),
+    'suntuosas': (['sun', 'tuo', 'sas'], [(['sun', 'tu', 'o', 'sas'], (2, 3))]),
     'televisual': (['te', 'le', 'vi', 'sual'],
                    [(['te', 'le', 'vi', 'su', 'al'], (3, 4))]),
     'textual': (['tex', 'tual'], [(['tex', 'tu', 'al'], (1, 2))]),
@@ -1451,8 +1455,13 @@
     'viajares': (['via', 'ja', 'res'], [(['vi', 'a', 'ja', 'res'], (0, 1))]),
     'viaje': (['via', 'je'], [(['vi', 'a', 'je'], (0, 1))]),
     'viajes': (['via', 'jes'], [(['vi', 'a', 'jes'], (0, 1))]),
-    'virtual': (['vir', 'tual'], [(['vir', 'tu', 'al'], (2, 2))]),
+    'virtual': (['vir', 'tual'], [(['vir', 'tu', 'al'], (2, 3))]),
+    'virtuoso': (['vir', 'tuo', 'so'], [(['vir', 'tu', 'o', 'so'], (2, 3))]),
+    'virtuosa': (['vir', 'tuo', 'sa'], [(['vir', 'tu', 'o', 'sa'], (2, 3))]),
+    'virtuosos': (['vir', 'tuo', 'sos'], [(['vir', 'tu', 'o', 'sos'], (2, 3))]),
+    'virtuosas': (['vir', 'tuo', 'sas'], [(['vir', 'tu', 'o', 'sas'], (2, 3))]),
     'visual': (['vi', 'sual'], [(['vi', 'su', 'al'], (1, 2))]),
     'visuales': (['vi', 'sua', 'les'], [(['vi', 'su', 'a', 'les'], (1, 2))]),
     'viudo': (['viu', 'do'], [(['vi', 'u', 'do'], (0, 1))]),
+    'ilión': (['i', 'lión'], [(['i', 'li', 'ón'], (0, 1))]),
     'viudos': (['viu', 'dos'], [(['vi', 'u', 'dos'], (0, 1))])}
diff --git a/tests/test_core.py b/tests/test_core.py
index 3c75309..e646c6e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -21,6 +21,7 @@
 from rantanplan.core import get_syllables_word_end
 from rantanplan.core import get_word_stress
 from rantanplan.core import get_words
+from rantanplan.core import has_single_liaisons
 from rantanplan.core import have_prosodic_liaison
 from rantanplan.core import is_paroxytone
 from rantanplan.core import spacy_tag_to_dict
@@ -1121,3 +1122,15 @@ def test_apply_exception_rules_consonan_w_vowel():
     word = "kiwi"
     output = "ki-wi"
     assert apply_exception_rules(word) == output
+
+
+def test_has_single_liaisons_false():
+    liaisons = [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]
+    output = has_single_liaisons(liaisons)
+    assert not output
+
+
+def test_has_single_liaisons_true():
+    liaisons = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
+    output = has_single_liaisons(liaisons)
+    assert output

From 5c706c3e31e03c5042a7511301635314e48acc8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Tue, 24 Mar 2020 10:33:52 +0100
Subject: [PATCH 6/6] =?UTF-8?q?Bump=20version:=200.4.2=20=E2=86=92=200.4.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 setup.cfg                  | 2 +-
 setup.py                   | 2 +-
 src/rantanplan/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 95ca53b..0281be1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.2
+current_version = 0.4.3
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index ac04a19..8ea5d71 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def read(*names, **kwargs):
 
 setup(
     name='rantanplan',
-    version='0.4.2',
+    version='0.4.3',
     license='Apache Software License 2.0',
     description='Scansion tool for Spanish texts',
     long_description='%s\n%s' % (
diff --git a/src/rantanplan/__init__.py b/src/rantanplan/__init__.py
index 3907fa6..24a1450 100644
--- a/src/rantanplan/__init__.py
+++ b/src/rantanplan/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.4.2'
+__version__ = '0.4.3'
 from .core import get_scansion  # noqa