From a41abe8b2520ce73cd799589b6121629614e13d8 Mon Sep 17 00:00:00 2001
From: Joris Van Looveren <joris@van-looveren.net>
Date: Mon, 15 Oct 2018 11:17:59 +0200
Subject: [PATCH 1/3] Automatically add rules for all terminals to specific,
 annotated rules.

---
 grammar/main.py                               |  6 ++
 grammar/parse.py                              | 72 +++++++++++++++----
 tests/testcases.txt                           |  1 +
 tests/testcases_expected_linux.txt            |  1 +
 tests/testcases_expected_mac.txt              |  1 +
 ...stcases_expected_windows_belgiankeymap.txt |  1 +
 ...cases_expected_windows_englishuskeymap.txt |  2 +
 7 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/grammar/main.py b/grammar/main.py
index b4b6b13..2ed3f08 100644
--- a/grammar/main.py
+++ b/grammar/main.py
@@ -16,8 +16,14 @@
     else:
         f = sys.stdin
 
+    # The parser is instantiated twice: once to allow
+    # the collection of terminals from the instantiated
+    # parser (in find_keywords), and then again, augmented
+    # with additional rules, created automatically from the
+    # set of terminals.
     parser = SingleInputParser()
     find_keywords(parser)  # init lexer
+    parser = SingleInputParser() 
 
     while True:
         line = f.readline()
diff --git a/grammar/parse.py b/grammar/parse.py
index 5e2049f..58f82d2 100644
--- a/grammar/parse.py
+++ b/grammar/parse.py
@@ -3,6 +3,7 @@
 from spark import GenericParser
 from spark import GenericASTBuilder
 from ast import AST
+import scan
 
 class GrammaticalError(Exception):
     def __init__(self, string):
@@ -11,9 +12,64 @@ def __str__(self):
         return self.string
 
 class CoreParser(GenericParser):
+
     def __init__(self, start):
+        # check if we have to add terminal rules, and
+        # do so if the list of terminals is known 
+        self.install_terminal_rules()
+        # initialize and set up the grammar rules
         GenericParser.__init__(self, start)
 
+    # In our grammar, the token type ANY does not match any of the other
+    # token types. In some cases, this is not the desired behavior, e.g. for
+    # "word <word>" you want <word> to be able to be "five" or "sentence" or
+    # any other word that may have been used as a terminal in the grammar.
+    # This becomes more of an issue as you add macros, and more words become
+    # reserved.
+    # We can work around this limitation by adding rules for terminals
+    # that we want to allow; however, with many terminals this will
+    # quickly become infeasible.
+    # The function and function decorator below work together to automate this.
+    # (The decorator is needed to modify the docstring programmatically.)
+    # We rely on the fact that in main.py, we already collect a list of
+    # terminals (using find_terminals()). This does mean, however, that we
+    # have to instantiate the parser twice: first in "basic" form, which is
+    # used to collect the terminals, and then again in "decorated" form, where
+    # we automatically add the desired terminal rules.
+        
+    def install_terminal_rules(self):
+        # if we have a list of terminals available: walk all rules, and see
+        # if they were annotated with @add_rules_for_terminals. If so, we add
+        # new rules based on the template for that rule and the terminals.
+        try:
+            if scan.keywords is not None:
+                for item in CoreParser.__dict__:
+                    if item.startswith("p_"):
+                        function = CoreParser.__dict__[item]
+                        try:
+                            # this will trigger an AttributeError
+                            # for functions that were not annotated:
+                            template = function._rule_template
+                            for kw in scan.keywords:
+                                function.__doc__ += \
+                                    (template.format(kw) + "\n")
+                        except AttributeError:
+                            pass
+        except AttributeError:
+            pass
+
+    # function decorator: adding @add_rules_for_termination("<rule_template>")
+    # before a function declaration will add the given rule template
+    # as a new attribute to the function.
+    # This is used to signal that for this function, we have to add a new rule
+    # for each terminal, so that the terminal can be used in the spoken text.
+    def add_rules_for_terminals(rule_template):
+        def add_attrs(func):
+            func._rule_template = rule_template
+            return func
+        return add_attrs
+
+
     def typestring(self, token):
         return token.type
 
@@ -347,10 +403,13 @@ def p_modifiers(self, args):
         else:
             return AST('mod_plus_key', [ value[args[0].type] ], [ args[1] ] )
 
+    @add_rules_for_terminals("english ::= word {}")
     def p_english(self, args):
         '''
             english ::= word ANY
         '''
+        if args[1].type != 'ANY':
+            return AST('sequence', [ args[1].type ])
         return AST('sequence', [ args[1].extra ])
 
     def p_word_sentence(self, args):
@@ -379,21 +438,10 @@ def p_word_repeat(self, args):
             args[1].children.insert(0, AST('null', args[0]))
             return args[1]
 
+    @add_rules_for_terminals("raw_word ::= {}")
     def p_raw_word(self, args):
         '''
             raw_word ::= ANY
-            raw_word ::= zero
-            raw_word ::= one
-            raw_word ::= two
-            raw_word ::= three
-            raw_word ::= four
-            raw_word ::= five
-            raw_word ::= six
-            raw_word ::= seven
-            raw_word ::= eight
-            raw_word ::= nine
-            raw_word ::= to
-            raw_word ::= for
         '''
         if(args[0].type == 'ANY'):
             return args[0].extra
diff --git a/tests/testcases.txt b/tests/testcases.txt
index 89853f6..360df9f 100644
--- a/tests/testcases.txt
+++ b/tests/testcases.txt
@@ -42,3 +42,4 @@ control space
 control left
 number twenty five
 number four hundred two thousand eight hundred fifteen
+phrase window sentence phrase
diff --git a/tests/testcases_expected_linux.txt b/tests/testcases_expected_linux.txt
index 4584b73..c9cfaee 100644
--- a/tests/testcases_expected_linux.txt
+++ b/tests/testcases_expected_linux.txt
@@ -42,3 +42,4 @@
 `/usr/bin/xdotool key ctrl+Left`
 `/usr/bin/xdotool key 2 key 5`
 `/usr/bin/xdotool key 4 key 0 key 2 key 8 key 1 key 5`
+`/usr/bin/xdotool key w key i key n key d key o key w key space key s key e key n key t key e key n key c key e key space key p key h key r key a key s key e`
diff --git a/tests/testcases_expected_mac.txt b/tests/testcases_expected_mac.txt
index c423248..3ce92f3 100644
--- a/tests/testcases_expected_mac.txt
+++ b/tests/testcases_expected_mac.txt
@@ -42,3 +42,4 @@
 `cliclick w:10 kd:ctrl kp:arrow-left ku:ctrl`
 `cliclick t:2 t:5`
 `cliclick t:4 t:0 t:2 t:8 t:1 t:5`
+`cliclick t:w t:i t:n t:d t:o t:w kp:space t:s t:e t:n t:t t:e t:n t:c t:e kp:space t:p t:h t:r t:a t:s t:e`
diff --git a/tests/testcases_expected_windows_belgiankeymap.txt b/tests/testcases_expected_windows_belgiankeymap.txt
index 4f57000..2c0411e 100644
--- a/tests/testcases_expected_windows_belgiankeymap.txt
+++ b/tests/testcases_expected_windows_belgiankeymap.txt
@@ -42,3 +42,4 @@
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5`
+`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e`
diff --git a/tests/testcases_expected_windows_englishuskeymap.txt b/tests/testcases_expected_windows_englishuskeymap.txt
index 59a163e..a56c660 100644
--- a/tests/testcases_expected_windows_englishuskeymap.txt
+++ b/tests/testcases_expected_windows_englishuskeymap.txt
@@ -42,3 +42,5 @@
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5`
+`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e`
+

From 67d0b8461f8d8704d315f7bbb055da5d6f1a083b Mon Sep 17 00:00:00 2001
From: Joris Van Looveren <joris@van-looveren.net>
Date: Wed, 17 Oct 2018 22:28:58 +0200
Subject: [PATCH 2/3] Add 'exclusions' list to add_rules_for_terminals function
 decorator

---
 grammar/parse.py                                 | 16 +++++++++++++---
 tests/testcases.txt                              |  1 +
 tests/testcases_expected_linux.txt               |  1 +
 tests/testcases_expected_mac.txt                 |  1 +
 .../testcases_expected_windows_belgiankeymap.txt |  1 +
 ...estcases_expected_windows_englishuskeymap.txt |  2 +-
 6 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/grammar/parse.py b/grammar/parse.py
index 58f82d2..4e574c5 100644
--- a/grammar/parse.py
+++ b/grammar/parse.py
@@ -50,7 +50,8 @@ def install_terminal_rules(self):
                             # this will trigger an AttributeError
                             # for functions that were not annotated:
                             template = function._rule_template
-                            for kw in scan.keywords:
+                            exclusions = function._exclusions
+                            for kw in set(scan.keywords) - set(exclusions):
                                 function.__doc__ += \
                                     (template.format(kw) + "\n")
                         except AttributeError:
@@ -63,9 +64,10 @@ def install_terminal_rules(self):
     # as a new attribute to the function.
     # This is used to signal that for this function, we have to add a new rule
     # for each terminal, so that the terminal can be used in the spoken text.
-    def add_rules_for_terminals(rule_template):
+    def add_rules_for_terminals(rule_template, exclusions=[]):
         def add_attrs(func):
             func._rule_template = rule_template
+            func._exclusions = exclusions
             return func
         return add_attrs
 
@@ -438,7 +440,15 @@ def p_word_repeat(self, args):
             args[1].children.insert(0, AST('null', args[0]))
             return args[1]
 
-    @add_rules_for_terminals("raw_word ::= {}")
+    # 'exclusions' contains the terminals that should continue to be
+    # treated as commands. As it is, the list is somewhat arbitrary;
+    # it contains modifier keys and a subset of the special characters from
+    # the "p_character" rule. Modify as desired.
+    @add_rules_for_terminals("raw_word ::= {}", exclusions = \
+                             ['control', 'alt', 'alternative',
+                              'colon', 'semicolon', 'bang', 'hash', 'percent',
+                              'ampersand', 'star', 'minus', 'underscore', 'plus',
+                              'backslash', 'question', 'comma'])
     def p_raw_word(self, args):
         '''
             raw_word ::= ANY
diff --git a/tests/testcases.txt b/tests/testcases.txt
index 360df9f..3f9e767 100644
--- a/tests/testcases.txt
+++ b/tests/testcases.txt
@@ -43,3 +43,4 @@ control left
 number twenty five
 number four hundred two thousand eight hundred fifteen
 phrase window sentence phrase
+sentence hello there comma space phrase how are you question
diff --git a/tests/testcases_expected_linux.txt b/tests/testcases_expected_linux.txt
index c9cfaee..24f1b0b 100644
--- a/tests/testcases_expected_linux.txt
+++ b/tests/testcases_expected_linux.txt
@@ -43,3 +43,4 @@
 `/usr/bin/xdotool key 2 key 5`
 `/usr/bin/xdotool key 4 key 0 key 2 key 8 key 1 key 5`
 `/usr/bin/xdotool key w key i key n key d key o key w key space key s key e key n key t key e key n key c key e key space key p key h key r key a key s key e`
+`/usr/bin/xdotool key H key e key l key l key o key space key t key h key e key r key e key comma key space key h key o key w key space key a key r key e key space key y key o key u key question`
diff --git a/tests/testcases_expected_mac.txt b/tests/testcases_expected_mac.txt
index 3ce92f3..a4c0379 100644
--- a/tests/testcases_expected_mac.txt
+++ b/tests/testcases_expected_mac.txt
@@ -43,3 +43,4 @@
 `cliclick t:2 t:5`
 `cliclick t:4 t:0 t:2 t:8 t:1 t:5`
 `cliclick t:w t:i t:n t:d t:o t:w kp:space t:s t:e t:n t:t t:e t:n t:c t:e kp:space t:p t:h t:r t:a t:s t:e`
+`cliclick t:H t:e t:l t:l t:o kp:space t:t t:h t:e t:r t:e t:',' kp:space t:h t:o t:w kp:space t:a t:r t:e kp:space t:y t:o t:u t:'?'`
diff --git a/tests/testcases_expected_windows_belgiankeymap.txt b/tests/testcases_expected_windows_belgiankeymap.txt
index 2c0411e..7e0e80d 100644
--- a/tests/testcases_expected_windows_belgiankeymap.txt
+++ b/tests/testcases_expected_windows_belgiankeymap.txt
@@ -43,3 +43,4 @@
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e`
+`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbc`
diff --git a/tests/testcases_expected_windows_englishuskeymap.txt b/tests/testcases_expected_windows_englishuskeymap.txt
index a56c660..e61b93e 100644
--- a/tests/testcases_expected_windows_englishuskeymap.txt
+++ b/tests/testcases_expected_windows_englishuskeymap.txt
@@ -43,4 +43,4 @@
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5`
 `C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e`
-
+`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbf`

From b8f3b52cc6a580db40b769151e5019b81db262ba Mon Sep 17 00:00:00 2001
From: Joris Van Looveren <joris@van-looveren.net>
Date: Thu, 18 Oct 2018 21:47:19 +0200
Subject: [PATCH 3/3] Move terminal collection to parser; rework parser init
 (no second init needed); verify lm.py

---
 grammar/lm.py    | 14 +++--------
 grammar/main.py  | 10 ++------
 grammar/parse.py | 64 ++++++++++++++++++++++++++++--------------------
 grammar/scan.py  |  7 ++----
 4 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/grammar/lm.py b/grammar/lm.py
index 8cb7f9a..9344494 100644
--- a/grammar/lm.py
+++ b/grammar/lm.py
@@ -109,24 +109,16 @@ def make_lm(rules, visited, which, prefix):
                 print prefix, t
                 new_prefix.append(t)
 
-def get_terminals(parser):
-    visited = {}
-    terminals = []
-    find_terminals(parser.rules, visited, 'START', terminals)
-    keywords = set(terminals)
-    return sorted(keywords)
-
 if __name__ == '__main__':
     import sys
     parser = SingleInputParser()
     #for rule in parser.rules:
     #    print rule, parser.rules[rule]
 
-    visited = {}
+    #visited = {}
     #make_lm(parser.rules, visited, 'START', [])
-    terminals = []
-    find_terminals(parser.rules, visited, 'START', terminals)
-    #print terminals
+
+    terminals = parser.terminals
 
     visited = {}
     find_sequences(parser.rules, visited, 'START')
diff --git a/grammar/main.py b/grammar/main.py
index 2ed3f08..3ebc4e8 100644
--- a/grammar/main.py
+++ b/grammar/main.py
@@ -1,6 +1,6 @@
 # Main file. Parse new commands from stdin until EOF.
 
-from scan import find_keywords
+from scan import install_keywords
 from scan import scan
 from parse import parse
 from parse import GrammaticalError
@@ -16,14 +16,8 @@
     else:
         f = sys.stdin
 
-    # The parser is instantiated twice: once to allow
-    # the collection of terminals from the instantiated
-    # parser (in find_keywords), and then again, augmented
-    # with additional rules, created automatically from the
-    # set of terminals.
     parser = SingleInputParser()
-    find_keywords(parser)  # init lexer
-    parser = SingleInputParser() 
+    install_keywords(parser)  # init lexer
 
     while True:
         line = f.readline()
diff --git a/grammar/parse.py b/grammar/parse.py
index 4e574c5..c2d1727 100644
--- a/grammar/parse.py
+++ b/grammar/parse.py
@@ -3,7 +3,6 @@
 from spark import GenericParser
 from spark import GenericASTBuilder
 from ast import AST
-import scan
 
 class GrammaticalError(Exception):
     def __init__(self, string):
@@ -13,12 +12,31 @@ def __str__(self):
 
 class CoreParser(GenericParser):
 
+    terminals = []
+
     def __init__(self, start):
-        # check if we have to add terminal rules, and
-        # do so if the list of terminals is known 
-        self.install_terminal_rules()
         # initialize and set up the grammar rules
         GenericParser.__init__(self, start)
+        # after the "base" initialization, collect all terminals
+        visited = {}
+        self.find_terminals(self.rules, visited, 'START', self.terminals)
+        self.terminals = list(set(self.terminals))  # remove duplicates
+        # add terminal rules if needed
+        self.install_terminal_rules()
+        # re-initialize the parser rules
+        GenericParser.__init__(self, start)
+
+    # collect all terminals from the grammar rules
+    def find_terminals(self, rules, visited, which, found):
+        if which in visited: return
+        visited[which] = 1
+        for r in rules[which]:
+            (name, tokens) = r
+            for t in tokens:
+                if t in rules:
+                    self.find_terminals(rules, visited, t, found)
+                elif t != 'END' and t != 'ANY' and t != '|-':
+                    found.append(t)
 
     # In our grammar, the token type ANY does not match any of the other
     # token types. In some cases, this is not the desired behavior, e.g. for
@@ -31,33 +49,25 @@ def __init__(self, start):
     # quickly become infeasible.
     # The function and function decorator below work together to automate this.
     # (The decorator is needed to modify the docstring programmatically.)
-    # We rely on the fact that in main.py, we already collect a list of
-    # terminals (using find_terminals()). This does mean, however, that we
-    # have to instantiate the parser twice: first in "basic" form, which is
-    # used to collect the terminals, and then again in "decorated" form, where
-    # we automatically add the desired terminal rules.
-        
+
     def install_terminal_rules(self):
         # if we have a list of terminals available: walk all rules, and see
         # if they were annotated with @add_rules_for_terminals. If so, we add
         # new rules based on the template for that rule and the terminals.
-        try:
-            if scan.keywords is not None:
-                for item in CoreParser.__dict__:
-                    if item.startswith("p_"):
-                        function = CoreParser.__dict__[item]
-                        try:
-                            # this will trigger an AttributeError
-                            # for functions that were not annotated:
-                            template = function._rule_template
-                            exclusions = function._exclusions
-                            for kw in set(scan.keywords) - set(exclusions):
-                                function.__doc__ += \
-                                    (template.format(kw) + "\n")
-                        except AttributeError:
-                            pass
-        except AttributeError:
-            pass
+        if len(self.terminals) > 0:
+            for item in CoreParser.__dict__:
+                if item.startswith("p_"):
+                    function = CoreParser.__dict__[item]
+                    try:
+                        # this will trigger an AttributeError
+                        # for functions that were not annotated:
+                        template = function._rule_template
+                        exclusions = function._exclusions
+                        for kw in set(self.terminals) - set(exclusions):
+                            function.__doc__ += \
+                                (template.format(kw) + "\n")
+                    except AttributeError:
+                        pass
 
     # function decorator: adding @add_rules_for_termination("<rule_template>")
     # before a function declaration will add the given rule template
diff --git a/grammar/scan.py b/grammar/scan.py
index d4c4d5f..ddcb408 100644
--- a/grammar/scan.py
+++ b/grammar/scan.py
@@ -1,11 +1,8 @@
 # Lexer that produces a sequence of tokens (keywords + ANY).
 
-import re
-from lm import get_terminals
-
-def find_keywords(parser):
+def install_keywords(parser):
     global keywords
-    keywords = get_terminals(parser)
+    keywords = parser.terminals
 
 class Token:
     def __init__(self, type, wordno=-1, extra=''):