From f0eb914a709681bef4c7031decbfc5a1f1fec4e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20P=C3=A9rez?= <alvaro.perez.pozo@gmail.com>
Date: Thu, 12 Mar 2020 14:47:10 +0100
Subject: [PATCH] New version with documentation (#69)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix/scansion (#62)

* Fixed syllabification exceptions, support for disabling/enabling spacy_affixes

* Fixed multiline break

* Fixed splitted verb stresses and secondary stress on '-mente' adverbs

* Fixed reviewed issues

* Fixed reviewed issues 2nd wave

* Added minimum length for '-mente' adverbs

* Added 'AUX' to the split_on list for spacy affixes (#64)

* Bump version: 0.4.0 → 0.4.1 (#66)

* Add documentation (#68)

* Added documentation

* Changed imports

* Bump version: 0.4.1 → 0.4.2

* Added usage

* Fixed requirements

* Unified syllabification variables and functions

* Added output example

Co-authored-by: Javier de la Rosa <versae@linhd.uned.es>
---
 CHANGELOG.rst                                 |  51 +++
 README.rst                                    | 101 +++++-
 docs/conf.py                                  |   2 +-
 docs/reference/rantanplan.rst                 |   2 +-
 docs/usage.rst                                |  93 +++++-
 requirements.txt                              |   4 +-
 setup.cfg                                     |   2 +-
 setup.py                                      |   2 +-
 src/rantanplan/__init__.py                    |   2 +-
 src/rantanplan/core.py                        | 297 +++++++-----------
 ..._syllabification.py => syllabification.py} | 120 +++++++
 11 files changed, 473 insertions(+), 203 deletions(-)
 rename src/rantanplan/{alternative_syllabification.py => syllabification.py} (94%)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 37a7f2c..49ce68f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,57 @@
 Changelog
 =========
 
+0.4.2 (2020-03-11)
+------------------
+
+* Added documentation
+
+0.4.1 (2019-12-19)
+------------------
+
+* Added 'AUX' to the split_on list for spacy affixes
+* Fixed syllabification exceptions, support for disabling/enabling spacy_affixes
+* Fixed multiline break
+* Fixed splitted verb stresses and secondary stress on '-mente' adverbs
+* Fixed some issues
+* Added minimum length for '-mente' adverbs
+
+0.4.0 (2019-11-21)
+------------------
+
+* Added SpaCy Doc input support
+* Add umlaut hyatus
+* Added new hyatus and fixed init
+* Refactoring code
+* Feat/new syllabification
+* Naming conventions
+* Adding rhyme analaysis to scansion output
+* Adding 'singleton' behaviour to load_pipeline
+* Metre analysis w/ sinaeresis and synalephas
+* Added new workflow for syllabification, with tests
+* Post syllabification rules regexes
+* Added unit tests for all functions
+
+0.3.0 (2019-06-18)
+------------------
+
+* Added SpaCy Doc input support
+* Add umlaut hyatus
+* Fixed syllabyfication errors, affixes and the pipeline
+* Fixed hyphenator for diphthongs with u umlaut
+* Added hyphenation for explicit hyatus with umlaut vowels
+* Added new hyatus and fixed __init__
+
+0.2.0 (2019-06-14)
+------------------
+
+* Better hyphenator, and affixes and pipeline fixes
+
+0.1.2 (2019-06-10)
+------------------
+
+* Republishing on Pypi
+
 0.1.0 (2019-07-03)
 ------------------
 
diff --git a/README.rst b/README.rst
index dd4c9d5..bf40230 100644
--- a/README.rst
+++ b/README.rst
@@ -39,9 +39,9 @@ Overview
     :alt: PyPI Package latest release
     :target: https://pypi.org/project/rantanplan
 
-.. |commits-since| image:: https://img.shields.io/github/commits-since/linhd-postdata/rantanplan/v0.1.0.svg
+.. |commits-since| image:: https://img.shields.io/github/commits-since/linhd-postdata/rantanplan/0.4.2.svg
     :alt: Commits since latest release
-    :target: https://github.com/linhd-postdata/rantanplan/compare/v0.1.0...master
+    :target: https://github.com/linhd-postdata/rantanplan/compare/0.4.2...master
 
 .. |wheel| image:: https://img.shields.io/pypi/wheel/rantanplan.svg
     :alt: PyPI Wheel
@@ -69,6 +69,103 @@ Installation
 
     pip install rantanplan
 
+Usage
+=====
+
+Install required resources
+--------------------------
+
+#. Install spaCy model language for Spanish::
+
+        python -m spacy download es_core_news_md
+
+#. Install Freeling rules for affixes::
+
+        python -m spacy_affixes download es
+
+
+Import rantanplan
+-----------------
+
+To use rantanplan in a project::
+
+        import rantanplan
+
+Usage example
+-------------
+.. code-block:: python
+
+    from rantanplan.core import get_scansion
+    
+    poem = """Me gustas cuando callas porque estás como ausente,
+    y me oyes desde lejos, y mi voz no te toca.
+    Parece que los ojos se te hubieran volado
+    y parece que un beso te cerrara la boca.
+
+    Como todas las cosas están llenas de mi alma
+    emerges de las cosas, llena del alma mía.
+    Mariposa de sueño, te pareces a mi alma,
+    y te pareces a la palabra melancolía."""
+    
+    get_scansion(poem)
+
+Output example
+--------------
+
+.. code-block:: python
+
+    [{'tokens': [{'word': [{'syllable': 'Me',
+      'is_stressed': False,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'gus', 'is_stressed': True},
+     {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'cuan', 'is_stressed': False},
+     {'syllable': 'do', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'ca', 'is_stressed': True},
+     {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'por', 'is_stressed': False},
+     {'syllable': 'que',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'es', 'is_stressed': False},
+     {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True}],
+    'stress_position': -1},
+   {'word': [{'syllable': 'co', 'is_stressed': False},
+     {'syllable': 'mo',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'au', 'is_stressed': False},
+     {'syllable': 'sen', 'is_stressed': True},
+     {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'symbol': ','}],
+  'phonological_groups': [{'syllable': 'Me',
+    'is_stressed': False,
+    'is_word_end': True},
+   {'syllable': 'gus', 'is_stressed': True},
+   {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'cuan', 'is_stressed': False},
+   {'syllable': 'do', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'ca', 'is_stressed': True},
+   {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'por', 'is_stressed': False},
+   {'syllable': 'quees', 'is_stressed': False, 'synalepha_index': [2]},
+   {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True},
+   {'syllable': 'co', 'is_stressed': False},
+   {'syllable': 'moau', 'is_stressed': False, 'synalepha_index': [1]},
+   {'syllable': 'sen', 'is_stressed': True},
+   {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+  'rhythm': {'stress': '-+---+---+--+-', 'type': 'pattern', 'length': 14}},
+   ...
+
 Documentation
 =============
 
diff --git a/docs/conf.py b/docs/conf.py
index 43d5926..d26561d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,7 +26,7 @@
 year = '2019'
 author = 'LINHD POSTDATA Project'
 copyright = '{0}, {1}'.format(year, author)
-version = release = '0.1.0'
+version = release = '0.4.2'
 
 pygments_style = 'trac'
 templates_path = ['.']
diff --git a/docs/reference/rantanplan.rst b/docs/reference/rantanplan.rst
index e63ebb4..f583606 100644
--- a/docs/reference/rantanplan.rst
+++ b/docs/reference/rantanplan.rst
@@ -5,5 +5,5 @@ rantanplan
 
     from rantanplan import *
 
-.. automodule:: rantanplan
+.. automodule:: rantanplan.core
     :members:
diff --git a/docs/usage.rst b/docs/usage.rst
index 9673afb..f7a6aae 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -1,7 +1,96 @@
-=====
 Usage
 =====
 
+Install required resources
+--------------------------
+
+#. Install spaCy model language for Spanish::
+
+        python -m spacy download es_core_news_md
+
+#. Install Freeling rules for affixes::
+
+        python -m spacy_affixes download es
+
+
+Import rantanplan
+-----------------
+
 To use rantanplan in a project::
 
-	import rantanplan
+        import rantanplan
+
+Usage example
+-------------
+.. code-block:: python
+
+    from rantanplan.core import get_scansion
+    
+    poem = """Me gustas cuando callas porque estás como ausente,
+    y me oyes desde lejos, y mi voz no te toca.
+    Parece que los ojos se te hubieran volado
+    y parece que un beso te cerrara la boca.
+
+    Como todas las cosas están llenas de mi alma
+    emerges de las cosas, llena del alma mía.
+    Mariposa de sueño, te pareces a mi alma,
+    y te pareces a la palabra melancolía."""
+    
+    get_scansion(poem)
+
+Output example
+--------------
+
+.. code-block:: python
+
+    [{'tokens': [{'word': [{'syllable': 'Me',
+      'is_stressed': False,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'gus', 'is_stressed': True},
+     {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'cuan', 'is_stressed': False},
+     {'syllable': 'do', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'ca', 'is_stressed': True},
+     {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'word': [{'syllable': 'por', 'is_stressed': False},
+     {'syllable': 'que',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'es', 'is_stressed': False},
+     {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True}],
+    'stress_position': -1},
+   {'word': [{'syllable': 'co', 'is_stressed': False},
+     {'syllable': 'mo',
+      'is_stressed': False,
+      'has_synalepha': True,
+      'is_word_end': True}],
+    'stress_position': 0},
+   {'word': [{'syllable': 'au', 'is_stressed': False},
+     {'syllable': 'sen', 'is_stressed': True},
+     {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+    'stress_position': -2},
+   {'symbol': ','}],
+  'phonological_groups': [{'syllable': 'Me',
+    'is_stressed': False,
+    'is_word_end': True},
+   {'syllable': 'gus', 'is_stressed': True},
+   {'syllable': 'tas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'cuan', 'is_stressed': False},
+   {'syllable': 'do', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'ca', 'is_stressed': True},
+   {'syllable': 'llas', 'is_stressed': False, 'is_word_end': True},
+   {'syllable': 'por', 'is_stressed': False},
+   {'syllable': 'quees', 'is_stressed': False, 'synalepha_index': [2]},
+   {'syllable': 'tás', 'is_stressed': True, 'is_word_end': True},
+   {'syllable': 'co', 'is_stressed': False},
+   {'syllable': 'moau', 'is_stressed': False, 'synalepha_index': [1]},
+   {'syllable': 'sen', 'is_stressed': True},
+   {'syllable': 'te', 'is_stressed': False, 'is_word_end': True}],
+  'rhythm': {'stress': '-+---+---+--+-', 'type': 'pattern', 'length': 14}},
+   ...
diff --git a/requirements.txt b/requirements.txt
index ce477bc..64e65ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 Click>=7.0
-spacy>=2.1
-spacy_affixes
\ No newline at end of file
+spacy>=2.2
+spacy_affixes
diff --git a/setup.cfg b/setup.cfg
index fc34ee2..95ca53b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.1
+current_version = 0.4.2
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index e865b96..ac04a19 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def read(*names, **kwargs):
 
 setup(
     name='rantanplan',
-    version='0.4.1',
+    version='0.4.2',
     license='Apache Software License 2.0',
     description='Scansion tool for Spanish texts',
     long_description='%s\n%s' % (
diff --git a/src/rantanplan/__init__.py b/src/rantanplan/__init__.py
index 0694a51..3907fa6 100644
--- a/src/rantanplan/__init__.py
+++ b/src/rantanplan/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.4.1'
+__version__ = '0.4.2'
 from .core import get_scansion  # noqa
diff --git a/src/rantanplan/core.py b/src/rantanplan/core.py
index e1c9cd2..becc165 100644
--- a/src/rantanplan/core.py
+++ b/src/rantanplan/core.py
@@ -13,143 +13,44 @@
 
 from spacy.tokens import Doc
 
-from .alternative_syllabification import ALTERNATIVE_SYLLABIFICATION
-from .alternative_syllabification import SYLLABIFICATOR_FOREIGN_WORDS_DICT
 from .pipeline import load_pipeline
 from .rhymes import STRUCTURES_LENGTH
 from .rhymes import analyze_rhyme
-
-"""
-Syllabification
-"""
-accents_re = re.compile("[áéíóú]", re.I | re.U)
-paroxytone_re = re.compile("([aeiou]|n|[aeiou]s)$",
-                           # checks if a str ends in unaccented vowel/N/S
-                           re.I | re.U)
-
-"""
-Regular expressions for spanish syllabification.
-For the 'tl' cluster we have decided to join the two letters
-because is the most common syllabification and the same that
-Perkins (http://sadowsky.cl/perkins.html), DIRAE (https://dirae.es/),
-and Educalingo (https://educalingo.com/es/dic-es) use.
-"""
-letter_clusters_re = re.compile(r"""
-    # 1: weak vowels diphthong with h
-    ([iuü]h[iuü])|
-    # 2: open vowels
-    ([aáeéíoóú]h[iuü])|
-    # 3: closed vowels
-    ([iuü]h[aáeéíoóú])|
-    # 4: liquid and mute consonants (adds hyphen)
-    ([a-záéíóúñ](?:(?:[bcdfghjklmnñpqstvy][hlr])|
-    (?:[bcdfghjklmnñpqrstvy][hr])|
-    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
-    # 5: any char followed by liquid and mute consonant,
-    # exceptions for 'r+l' and 't+l'
-    ((?:(?:[bcdfghjklmnñpqstvy][hlr])|
-    (?:[bcdfghjklmnñpqrstvy][hr])|
-    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
-    # 6: non-liquid consonant (adds hyphen)
-    ([a-záéíóúñ][bcdfghjklmnñpqrstvxyz][aáeéiíoóuúüï])|
-    # 7: vowel group (adds hyphen)
-    ([aáeéíoóú][aáeéíoóú])|
-    # 8: umlaut 'u' diphthongs
-    (ü[iíaeo])|
-    # 9: Explicit hiatus with umlaut vowels, first part
-    ([aeiou][äëïöü])|
-    #10: Explicit hiatus with umlaut vowels, second part
-    ([üäëïö][a-z])|
-    #11: any char
-    ([a-záéíóúñ])""", re.I | re.U | re.VERBOSE)  # VERBOSE to catch the group
-
-"""
-Rhythmical Analysis
-"""
-SPACE = "SPACE"
-STRONG_VOWELS = set("aeoáéóÁÉÓAEO")
-WEAK_VOWELS = set("iuüíúIÍUÜÚ")
-LIAISON_FIRST_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚyY")
-LIAISON_SECOND_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚhyYH")
-
-STRESSED_UNACCENTED_MONOSYLLABLES = {"yo", "vio", "dio", "fe", "sol", "ti",
-                                     "un"}
-
-UNSTRESSED_UNACCENTED_MONOSYLLABLES = {'de', 'el', 'la', 'las', 'le', 'les',
-                                       'lo', 'los',
-                                       'mas', 'me', 'mi', 'nos', 'os', 'que',
-                                       'se', 'si',
-                                       'su', 'tan', 'te', 'tu', "tus", "oh"}
-
-UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
-                    "cuando", "cuanto", "como", "cuantas", "cuantos"}
-
-STRESSED_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
-                 "tuyas", "suyo", "suya", "suyos", "suyas", "todo"}
-
-POSSESSIVE_PRON_UNSTRESSED = {"nuestro", "nuestra", "nuestros", "nuestras",
-                              "vuestro", "vuestra", "vuestros", "vuestras"}
-
-"""
-Regular expressions and rules for syllabification exceptions
-"""
-
-# Words starting with prefixes SIN-/DES- followed by consonant "destituir"
-PREFIX_DES_WITH_CONSONANT_RE = (
-    re.compile("^(des)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
-
-# Words starting with prefixes SIN-/DES- followed by consonant "sinhueso"
-PREFIX_SIN_WITH_CONSONANT_RE = (
-    re.compile("^(sin)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
-
-# Group consonant+[hlr] with exceptions for ll
-CONSONANT_GROUP = (re.compile("(.*[hmnqsw])([hlr][aeiouáéíóú].*)", re.I | re.U))
-CONSONANT_GROUP_EXCEPTION_LL = (
-    re.compile("(.*[hlmnqsw])([hr][aeiouáéíóú].*)", re.I | re.U))
-CONSONANT_GROUP_EXCEPTION_DL = (
-    re.compile("(.*[d])([l][aeiouáéíóú].*)", re.I | re.U))
-
-# Group vowel+ w + vowel
-W_VOWEL_GROUP = (re.compile("(.*[aeiouáéíóú])(w[aeiouáéíóú].*)", re.I | re.U))
-
-# Post-syllabification exceptions for consonant clusters and diphthongs
-# Explicitit hiatus on first vowel
-HIATUS_FIRST_VOWEL_RE = (re.compile(
-    "(?:(.*-)|^)([äëïö]|[^g]ü)([aeiouúáéíó].*)",
-    re.I | re.U | re.VERBOSE))
-
-# Consonant cluster. Example: 'cneorácea'
-CONSONANT_CLUSTER_RE = (re.compile(
-    "(?:(.*-)|^)([mpgc])-([bcdfghjklmñnpqrstvwxyz][aeioáéíó].*)",
-    re.I | re.U | re.VERBOSE))
-
-# Lowering diphthong. Example: 'ahijador'
-LOWERING_DIPHTHONGS_WITH_H = (
-    re.compile(
-        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
-        ([aeo])-(h[iu](?![aeoiuíúáéó]).*)""",
-        re.I | re.U | re.VERBOSE))
-
-# Lowering diphthong. Example: 'buhitiho'
-RAISING_DIPHTHONGS_WITH_H = (
-    re.compile(
-        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
-        ([iu])-(h[aeiouáéó](?![aeoáéiuíú]).*)""",
-        re.I | re.U | re.VERBOSE))
-
-"""
-Rhythmical Analysis functions
-"""
+from .syllabification import ALTERNATIVE_SYLLABIFICATION
+from .syllabification import CONSONANT_CLUSTER_RE
+from .syllabification import CONSONANT_GROUP
+from .syllabification import CONSONANT_GROUP_EXCEPTION_DL
+from .syllabification import CONSONANT_GROUP_EXCEPTION_LL
+from .syllabification import HIATUS_FIRST_VOWEL_RE
+from .syllabification import LIAISON_FIRST_PART
+from .syllabification import LIAISON_SECOND_PART
+from .syllabification import LOWERING_DIPHTHONGS_WITH_H
+from .syllabification import POSSESSIVE_PRON_UNSTRESSED
+from .syllabification import PREFIX_DES_WITH_CONSONANT_RE
+from .syllabification import PREFIX_SIN_WITH_CONSONANT_RE
+from .syllabification import RAISING_DIPHTHONGS_WITH_H
+from .syllabification import SPACE
+from .syllabification import STRESSED_PRON
+from .syllabification import STRESSED_UNACCENTED_MONOSYLLABLES
+from .syllabification import STRONG_VOWELS
+from .syllabification import SYLLABIFICATOR_FOREIGN_WORDS_DICT
+from .syllabification import UNSTRESSED_FORMS
+from .syllabification import UNSTRESSED_UNACCENTED_MONOSYLLABLES
+from .syllabification import W_VOWEL_GROUP
+from .syllabification import WEAK_VOWELS
+from .syllabification import accents_re
+from .syllabification import letter_clusters_re
+from .syllabification import paroxytone_re
 
 
 def have_prosodic_liaison(first_syllable, second_syllable):
-    """
-    Checkfor prosodic liaison between two syllables
-    :param first_syllable: dic with key syllable (str) and is_stressed (bool)
-                           representing the first syllable
-    :param second_syllable: dic with key syllable (str) and is_stressed (bool)
-                            representing the second syllable
-    :return: True if there is prosodic liaison and False otherwise
+    """Checks for prosodic liaison between two syllables
+
+    :param first_syllable: Dictionary with key syllable (str) and is_stressed (bool) representing
+        the first syllable
+    :param second_syllable: Dictionary with key syllable (str) and is_stressed (bool)
+        representing the second syllable
+    :return: `True` if there is prosodic liaison and `False` otherwise
     :rtype: bool
     """
     if second_syllable['syllable'][0].lower() == 'y' and (
@@ -162,10 +63,11 @@ def have_prosodic_liaison(first_syllable, second_syllable):
 
 
 def get_syllables_word_end(words):
-    """
-    Get a list of syllables from a list of words extracting word boundaries
+    """Get a list of syllables from a list of words extracting word boundaries
+
     :param words: List of dictonaries of syllables for each word in a line
     :return: List of dictionaries of syllables with an extra is_word_end key
+    :rtype: list
     """
     syllables = []
     for word in words:
@@ -180,17 +82,18 @@ def get_syllables_word_end(words):
 
 def get_phonological_groups(word_syllables, liaison_type="synalepha",
                             breakage_func=None, liaison_positions=None):
-    """
-    Get a list of dictionaries for each phonological group on a line
+    """Get a list of dictionaries for each phonological group on a line
     and joins the syllables to create phonological groups (pronounced together)
     according to a type of liaison, either synaloepha or sinaeresis
+
     :param word_syllables: List of dictionaries for each word of the line
     :param liaison_type: Which liaison is going to be performed synalepha or
-                         sinaeresis
+        sinaeresis
     :param breakage_func: Function to decide when not to break a liaison that is
-    specified in liaison_positions
+        specified in liaison_positions
     :param liaison_positions: Positions of the liaisons
     :return: A list of conjoined syllables
+    :rtype: list
     """
     syllables = word_syllables[:]
     liaison_property = f"has_{liaison_type}"
@@ -240,13 +143,14 @@ def get_phonological_groups(word_syllables, liaison_type="synalepha",
 
 
 def clean_phonological_groups(groups, liaison_positions, liaison_property):
-    """
-    Clean phonological groups so their liaison property is consistently set
+    """Clean phonological groups so their liaison property is consistently set
     according to the the liaison positions
+
     :param groups: Phonological groups to be cleaned
     :param liaison_positions: Positions of the liaisons
     :param liaison_property: The liaison type (synaeresis or synalepha)
-    :return:
+    :return: Cleaned phonological groups
+    :rtype: dict
     """
     clean_groups = []
     for idx, group in enumerate(groups):
@@ -260,12 +164,13 @@ def clean_phonological_groups(groups, liaison_positions, liaison_property):
 
 
 def get_rhythmical_pattern(phonological_groups, rhythm_format="pattern"):
-    """
-    Gets a rhythm pattern for a poem in either "pattern": "-++-+-+-"
+    """Gets a rhythm pattern for a poem in either "pattern": "-++-+-+-"
     "binary": "01101010" or "indexed": [1,2,4,6] format
+
     :param phonological_groups: a dictionary with the syllables of the line
     :param rhythm_format: The output format for the rhythm
     :return: Dictionary with with rhythm and phonologic groups
+    :rtype: dict
     """
     stresses = get_stresses(phonological_groups)
     stress = format_stress(stresses, rhythm_format)
@@ -277,15 +182,16 @@ def get_rhythmical_pattern(phonological_groups, rhythm_format="pattern"):
 
 
 def get_stresses(phonological_groups):
-    """
-    Gets a list of stress marks (True for stressed, False for unstressed) from a
-    list of phonological groups applying rules depending on the ending stress.
+    """Gets a list of stress marks (`True` for stressed, `False` for unstressed)
+    from a list of phonological groups applying rules depending on the ending
+    stress.
+
     :param phonological_groups: a dictionary with the phonological groups
-    (syllables) of the line
+        (syllables) of the line
     :return: List of boolean values indicating whether a group is
-    stressed (True) or not (False)
+        stressed (`True`) or not (`False`)
+    :rtype: list
     """
-    # stresses = [group["is_stressed"] for group in phonological_groups]
     stresses = []
     last_word_syllables = []
     for group in phonological_groups:
@@ -312,16 +218,17 @@ def get_stresses(phonological_groups):
 
 
 def format_stress(stresses, rhythm_format="pattern", indexed_separator="-"):
-    """
-    Converts a list of boolean elements into a string that matches the chosen
-    rhythm format:
-                "indexed": 2,5,8
-                "pattern": -++--+-+-
-                "binary": 01101001
+    """Converts a list of boolean elements into a string that matches the chosen
+        rhythm format:
+        "indexed": 2,5,8
+        "pattern": -++--+-+-
+        "binary": 01101001
+
     :param stresses: List of boolean elements representing stressed syllables
     :param rhythm_format: Format to be used: indexed, pattern, or binary
     :param indexed_separator: String to use as a separator for indexed pattern
     :return: String with the stress pattern
+    :rtype: str
     """
     separator = ""
     if rhythm_format == 'indexed':
@@ -342,11 +249,12 @@ def format_stress(stresses, rhythm_format="pattern", indexed_separator="-"):
 
 
 def apply_exception_rules(word):
-    """
-    Applies presyllabification rules to a word,
+    """Applies presyllabification rules to a word,
     based on Antonio Ríos Mestre's work
+
     :param word: A string to be checked for exceptions
     :return: A string with the presyllabified word
+    :rtype: str
     """
     # Vowel + w + vowel group
     if W_VOWEL_GROUP.match(word):
@@ -380,11 +288,12 @@ def apply_exception_rules(word):
 
 
 def apply_exception_rules_post(word):
-    """
-    Applies presyllabification rules to a word,
+    """Applies presyllabification rules to a word,
     based on Antonio Ríos Mestre's work
+
     :param word: A string to be checked for exceptions
     :return: A string with the presyllabified word with hyphens
+    :rtype: str
     """
     # We make one pass for every match found so we can perform
     # several substitutions
@@ -403,12 +312,12 @@ def apply_exception_rules_post(word):
 
 
 def syllabify(word, alternative_syllabification=False):
-    """
-    Syllabifies a word.
+    """Syllabifies a word.
+
     :param word: The word to be syllabified.
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
-    :return: list of syllables and exceptions where appropriate.
+        syllabification is used
+    :return: List of syllables and exceptions where appropriate.
     :rtype: list
     """
     output = ""
@@ -438,10 +347,10 @@ def syllabify(word, alternative_syllabification=False):
 
 
 def get_orthographic_accent(syllable_list):
-    """
-    Given a list of str representing syllables,
+    """Given a list of str representing syllables,
     return position in the list of a syllable bearing
     orthographic stress (with the acute accent mark in Spanish)
+
     :param syllable_list: list of syllables as str or unicode each
     :return: Position or None if no orthographic stress
     :rtype: int
@@ -456,11 +365,11 @@ def get_orthographic_accent(syllable_list):
 
 
 def is_paroxytone(syllables):
-    """
-    Given a list of str representing syllables from a single word,
+    """Given a list of str representing syllables from a single word,
     check if it is paroxytonic (llana) or not
+
     :param syllables: List of syllables as str
-    :return: True if paroxytone, False if not
+    :return: `True` if paroxytone, `False` if not
     :rtype: bool
     """
     if not get_orthographic_accent("".join(syllables)):
@@ -469,12 +378,13 @@ def is_paroxytone(syllables):
 
 
 def spacy_tag_to_dict(tag):
-    """
-    Creater a dict from spacy pos tags
+    """Creates a dict from spacy pos tags
+
     :param tag: Extended spacy pos tag
-    ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
+        ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
     :return: A dictionary in the form of
-    "{'Definite': 'Ind', 'Gender': 'Masc', 'Number': 'Sing', 'PronType': 'Art'}"
+        "{'Definite': 'Ind', 'Gender': 'Masc', 'Number': 'Sing',
+        'PronType': 'Art'}"
     :rtype: dict
     """
     if tag and '=' in tag:
@@ -484,18 +394,17 @@ def spacy_tag_to_dict(tag):
 
 
 def get_word_stress(word, pos, tag, alternative_syllabification=False):
-    """
-    Gets a list of syllables from a word and creates a list with syllabified
+    """Gets a list of syllables from a word and creates a list with syllabified
     word and stressed syllable index
+
     :param word: Word string
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
+        syllabification is used
     :param pos: PoS tag from spacy ("DET")
     :param tag: Extended PoS tag info from spacy
-    ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
+        ("Definite=Ind|Gender=Masc|Number=Sing|PronType=Art")
     :return: Dict with [original syllab word, stressed syllabified word,
-    negative index position of stressed syllable or 0
-    if not stressed]
+        negative index position of stressed syllable or 0 if not stressed]
     :rtype: dict
     """
     syllable_list, _ = syllabify(word, alternative_syllabification)
@@ -593,10 +502,11 @@ def get_word_stress(word, pos, tag, alternative_syllabification=False):
 
 
 def get_last_syllable(token_list):
-    """
-    Gets last syllable from a word in a dictionary
+    """Gets last syllable from a word in a dictionary
+
     :param token_list: list of dictionaries with line tokens
     :return: Last syllable
+    :rtype: str
     """
     if len(token_list) > 0:
         for token in token_list[::-1]:
@@ -605,14 +515,14 @@ def get_last_syllable(token_list):
 
 
 def get_words(word_list, alternative_syllabification=False):
-    """
-    Gets a list of syllables from a word and creates a list with syllabified
+    """Gets a list of syllables from a word and creates a list with syllabified
     word and stressed syllabe index
+
     :param word_list: List of spacy objects representing a word or sentence
     :param alternative_syllabification: Wether or not the alternative
-    syllabification is used
+        syllabification is used
     :return: List with [original syllab. word, stressed syllab. word, negative
-    index position of stressed syllable]
+        index position of stressed syllable]
     :rtype: list
     """
     syllabified_words = []
@@ -647,10 +557,11 @@ def get_words(word_list, alternative_syllabification=False):
 
 
 def join_affixes(line):
-    """
-    Join affixes of split words and recalculates stress
+    """Join affixes of split words and recalculates stress
+
     :param line: List of syllabified words (dict)
     :return: List of syllabified words (dict) with joined affixes
+    :rtype: list
     """
     syllabified_words = []
     indices_to_ignore = []
@@ -675,13 +586,13 @@ def join_affixes(line):
 
 def get_scansion(text, rhyme_analysis=False, rhythm_format="pattern",
                  rhythmical_lengths=None):
-    """
-    Generates a list of dictionaries for each line
+    """Generates a list of dictionaries for each line
+
     :param text: Full text to be analyzed
     :param rhyme_analysis: Specify if rhyme analysis is to be performed
     :param rhythm_format: output format for rhythm analysis
     :param rhythmical_lengths: List with explicit rhythmical lengths per line
-    that the analysed lines has to meet
+        that the analysed lines has to meet
     :return: list of dictionaries per line
     :rtype: list
     """
@@ -760,10 +671,11 @@ def break_on_h(liaison_type, syllable_left, syllable_right):
 
 
 def generate_phonological_groups(tokens):
-    """
-    Generates phonological groups from a list of tokens
+    """Generates phonological groups from a list of tokens
+
     :param tokens: list of spaCy tokens
     :return: Generator with a list of phonological groups
+    :rtype: generator
     """
     for alternative_syllabification in (True, False):
         words = get_words(tokens, alternative_syllabification)
@@ -799,11 +711,12 @@ def generate_phonological_groups(tokens):
 
 
 def generate_liaison_positions(syllables, liaison):
-    """
-    Generates all possible combinations for the liaisons on a list of syllables
+    """Generates all possible combinations for the liaisons on a list of syllables
+
     :param syllables: List of syllables with
     :param liaison: Type of liaison combination to be generated
     :return: Generator with a list of possible combinations
+    :rtype: generator
     """
     positions = [int(syllable.get(f"has_{liaison}", 0))
                  for syllable in syllables]
diff --git a/src/rantanplan/alternative_syllabification.py b/src/rantanplan/syllabification.py
similarity index 94%
rename from src/rantanplan/alternative_syllabification.py
rename to src/rantanplan/syllabification.py
index f1d0bde..38a3281 100644
--- a/src/rantanplan/alternative_syllabification.py
+++ b/src/rantanplan/syllabification.py
@@ -1,3 +1,123 @@
+import re
+
+"""
+Syllabification
+"""
+accents_re = re.compile("[áéíóú]", re.I | re.U)
+paroxytone_re = re.compile("([aeiou]|n|[aeiou]s)$",
+                           # checks if a str ends in unaccented vowel/N/S
+                           re.I | re.U)
+
+"""
+Regular expressions for spanish syllabification.
+For the 'tl' cluster we have decided to join the two letters
+because is the most common syllabification and the same that
+Perkins (http://sadowsky.cl/perkins.html), DIRAE (https://dirae.es/),
+and Educalingo (https://educalingo.com/es/dic-es) use.
+"""
+letter_clusters_re = re.compile(r"""
+    # 1: weak vowels diphthong with h
+    ([iuü]h[iuü])|
+    # 2: open vowels
+    ([aáeéíoóú]h[iuü])|
+    # 3: closed vowels
+    ([iuü]h[aáeéíoóú])|
+    # 4: liquid and mute consonants (adds hyphen)
+    ([a-záéíóúñ](?:(?:[bcdfghjklmnñpqstvy][hlr])|
+    (?:[bcdfghjklmnñpqrstvy][hr])|
+    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
+    # 5: any char followed by liquid and mute consonant,
+    # exceptions for 'r+l' and 't+l'
+    ((?:(?:[bcdfghjklmnñpqstvy][hlr])|
+    (?:[bcdfghjklmnñpqrstvy][hr])|
+    (?:[bcdfghjklmnñpqrstvyz][h]))[aáeéiíoóuúü])|
+    # 6: non-liquid consonant (adds hyphen)
+    ([a-záéíóúñ][bcdfghjklmnñpqrstvxyz][aáeéiíoóuúüï])|
+    # 7: vowel group (adds hyphen)
+    ([aáeéíoóú][aáeéíoóú])|
+    # 8: umlaut 'u' diphthongs
+    (ü[iíaeo])|
+    # 9: Explicit hiatus with umlaut vowels, first part
+    ([aeiou][äëïöü])|
+    #10: Explicit hiatus with umlaut vowels, second part
+    ([üäëïö][a-z])|
+    #11: any char
+    ([a-záéíóúñ])""", re.I | re.U | re.VERBOSE)  # VERBOSE to catch the group
+
+"""
+Rhythmical Analysis
+"""
+SPACE = "SPACE"
+STRONG_VOWELS = set("aeoáéóÁÉÓAEO")
+WEAK_VOWELS = set("iuüíúIÍUÜÚ")
+LIAISON_FIRST_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚyY")
+LIAISON_SECOND_PART = set("aeiouáéíóúAEIOUÁÉÍÓÚhyYH")
+
+STRESSED_UNACCENTED_MONOSYLLABLES = {"yo", "vio", "dio", "fe", "sol", "ti",
+                                     "un"}
+
+UNSTRESSED_UNACCENTED_MONOSYLLABLES = {'de', 'el', 'la', 'las', 'le', 'les',
+                                       'lo', 'los',
+                                       'mas', 'me', 'mi', 'nos', 'os', 'que',
+                                       'se', 'si',
+                                       'su', 'tan', 'te', 'tu', "tus", "oh"}
+
+UNSTRESSED_FORMS = {"ay", "don", "doña", "aun", "que", "cual", "quien", "donde",
+                    "cuando", "cuanto", "como", "cuantas", "cuantos"}
+
+STRESSED_PRON = {"mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos",
+                 "tuyas", "suyo", "suya", "suyos", "suyas", "todo"}
+
+POSSESSIVE_PRON_UNSTRESSED = {"nuestro", "nuestra", "nuestros", "nuestras",
+                              "vuestro", "vuestra", "vuestros", "vuestras"}
+
+"""
+Regular expressions and rules for syllabification exceptions
+"""
+
+# Words starting with prefixes SIN-/DES- followed by consonant "destituir"
+PREFIX_DES_WITH_CONSONANT_RE = (
+    re.compile("^(des)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
+
+# Words starting with prefixes SIN-/DES- followed by consonant "sinhueso"
+PREFIX_SIN_WITH_CONSONANT_RE = (
+    re.compile("^(sin)([bcdfgjklmhnñpqrstvxyz].*)", re.I | re.U))
+
+# Group consonant+[hlr] with exceptions for ll
+CONSONANT_GROUP = (re.compile("(.*[hmnqsw])([hlr][aeiouáéíóú].*)", re.I | re.U))
+CONSONANT_GROUP_EXCEPTION_LL = (
+    re.compile("(.*[hlmnqsw])([hr][aeiouáéíóú].*)", re.I | re.U))
+CONSONANT_GROUP_EXCEPTION_DL = (
+    re.compile("(.*[d])([l][aeiouáéíóú].*)", re.I | re.U))
+
+# Group vowel+ w + vowel
+W_VOWEL_GROUP = (re.compile("(.*[aeiouáéíóú])(w[aeiouáéíóú].*)", re.I | re.U))
+
+# Post-syllabification exceptions for consonant clusters and diphthongs
+# Explicitit hiatus on first vowel
+HIATUS_FIRST_VOWEL_RE = (re.compile(
+    "(?:(.*-)|^)([äëïö]|[^g]ü)([aeiouúáéíó].*)",
+    re.I | re.U | re.VERBOSE))
+
+# Consonant cluster. Example: 'cneorácea'
+CONSONANT_CLUSTER_RE = (re.compile(
+    "(?:(.*-)|^)([mpgc])-([bcdfghjklmñnpqrstvwxyz][aeioáéíó].*)",
+    re.I | re.U | re.VERBOSE))
+
+# Lowering diphthong. Example: 'ahijador'
+LOWERING_DIPHTHONGS_WITH_H = (
+    re.compile(
+        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
+        ([aeo])-(h[iu](?![aeoiuíúáéó]).*)""",
+        re.I | re.U | re.VERBOSE))
+
+# Lowering diphthong. Example: 'buhitiho'
+RAISING_DIPHTHONGS_WITH_H = (
+    re.compile(
+        """((?:.*-|^)(?:qu|[bcdfghjklmñnpqrstvwxyz]+)?)
+        ([iu])-(h[aeiouáéó](?![aeoáéiuíú]).*)""",
+        re.I | re.U | re.VERBOSE))
+
 """
 Exceptions for foreign words in Spanish that do not follow
 standard Spanish syllabification rules