diff --git a/grammar/lm.py b/grammar/lm.py index 8cb7f9a..9344494 100644 --- a/grammar/lm.py +++ b/grammar/lm.py @@ -109,24 +109,16 @@ def make_lm(rules, visited, which, prefix): print prefix, t new_prefix.append(t) -def get_terminals(parser): - visited = {} - terminals = [] - find_terminals(parser.rules, visited, 'START', terminals) - keywords = set(terminals) - return sorted(keywords) - if __name__ == '__main__': import sys parser = SingleInputParser() #for rule in parser.rules: # print rule, parser.rules[rule] - visited = {} + #visited = {} #make_lm(parser.rules, visited, 'START', []) - terminals = [] - find_terminals(parser.rules, visited, 'START', terminals) - #print terminals + + terminals = parser.terminals visited = {} find_sequences(parser.rules, visited, 'START') diff --git a/grammar/main.py b/grammar/main.py index b4b6b13..3ebc4e8 100644 --- a/grammar/main.py +++ b/grammar/main.py @@ -1,6 +1,6 @@ # Main file. Parse new commands from stdin until EOF. -from scan import find_keywords +from scan import install_keywords from scan import scan from parse import parse from parse import GrammaticalError @@ -17,7 +17,7 @@ f = sys.stdin parser = SingleInputParser() - find_keywords(parser) # init lexer + install_keywords(parser) # init lexer while True: line = f.readline() diff --git a/grammar/parse.py b/grammar/parse.py index 5e2049f..c2d1727 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -11,8 +11,76 @@ def __str__(self): return self.string class CoreParser(GenericParser): + + terminals = [] + def __init__(self, start): + # initialize and set up the grammar rules GenericParser.__init__(self, start) + # after the "base" initialization, collect all terminals + visited = {} + self.find_terminals(self.rules, visited, 'START', self.terminals) + self.terminals = list(set(self.terminals)) # remove duplicates + # add terminal rules if needed + self.install_terminal_rules() + # re-initialize the parser rules + GenericParser.__init__(self, start) + + # collect all terminals from the grammar rules + def find_terminals(self, rules, visited, which, found): + if which in visited: return + visited[which] = 1 + for r in rules[which]: + (name, tokens) = r + for t in tokens: + if t in rules: + self.find_terminals(rules, visited, t, found) + elif t != 'END' and t != 'ANY' and t != '|-': + found.append(t) + + # In our grammar, the token type ANY does not match any of the other + # token types. In some cases, this is not the desired behavior, e.g. for + # "word " you want to be able to be "five" or "sentence" or + # any other word that may have been used as a terminal in the grammar. + # This becomes more of an issue as you add macros, and more words become + # reserved. + # We can work around this limitation by adding rules for terminals + # that we want to allow; however, with many terminals this will + # quickly become infeasible. + # The function and function decorator below work together to automate this. + # (The decorator is needed to modify the docstring programmatically.) + + def install_terminal_rules(self): + # if we have a list of terminals available: walk all rules, and see + # if they were annotated with @add_rules_for_terminals. If so, we add + # new rules based on the template for that rule and the terminals. + if len(self.terminals) > 0: + for item in CoreParser.__dict__: + if item.startswith("p_"): + function = CoreParser.__dict__[item] + try: + # this will trigger an AttributeError + # for functions that were not annotated: + template = function._rule_template + exclusions = function._exclusions + for kw in set(self.terminals) - set(exclusions): + function.__doc__ += \ + (template.format(kw) + "\n") + except AttributeError: + pass + + # function decorator: adding @add_rules_for_termination("") + # before a function declaration will add the given rule template + # as a new attribute to the function. + # This is used to signal that for this function, we have to add a new rule + # for each terminal, so that the terminal can be used in the spoken text. + def add_rules_for_terminals(rule_template, exclusions=[]): + def add_attrs(func): + func._rule_template = rule_template + func._exclusions = exclusions + return func + return add_attrs + def typestring(self, token): return token.type @@ -347,10 +415,13 @@ def p_modifiers(self, args): else: return AST('mod_plus_key', [ value[args[0].type] ], [ args[1] ] ) + @add_rules_for_terminals("english ::= word {}") def p_english(self, args): ''' english ::= word ANY ''' + if args[1].type != 'ANY': + return AST('sequence', [ args[1].type ]) return AST('sequence', [ args[1].extra ]) def p_word_sentence(self, args): @@ -379,21 +450,18 @@ def p_word_repeat(self, args): args[1].children.insert(0, AST('null', args[0])) return args[1] + # 'exclusions' contains the terminals that should continue to be + # treated as commands. As it is, the list is somewhat arbitrary; + # it contains modifier keys and a subset of the special characters from + # the "p_character" rule. Modify as desired. + @add_rules_for_terminals("raw_word ::= {}", exclusions = \ + ['control', 'alt', 'alternative', + 'colon', 'semicolon', 'bang', 'hash', 'percent', + 'ampersand', 'star', 'minus', 'underscore', 'plus', + 'backslash', 'question', 'comma']) def p_raw_word(self, args): ''' raw_word ::= ANY - raw_word ::= zero - raw_word ::= one - raw_word ::= two - raw_word ::= three - raw_word ::= four - raw_word ::= five - raw_word ::= six - raw_word ::= seven - raw_word ::= eight - raw_word ::= nine - raw_word ::= to - raw_word ::= for ''' if(args[0].type == 'ANY'): return args[0].extra diff --git a/grammar/scan.py b/grammar/scan.py index d4c4d5f..ddcb408 100644 --- a/grammar/scan.py +++ b/grammar/scan.py @@ -1,11 +1,8 @@ # Lexer that produces a sequence of tokens (keywords + ANY). -import re -from lm import get_terminals - -def find_keywords(parser): +def install_keywords(parser): global keywords - keywords = get_terminals(parser) + keywords = parser.terminals class Token: def __init__(self, type, wordno=-1, extra=''): diff --git a/tests/testcases.txt b/tests/testcases.txt index 89853f6..3f9e767 100644 --- a/tests/testcases.txt +++ b/tests/testcases.txt @@ -42,3 +42,5 @@ control space control left number twenty five number four hundred two thousand eight hundred fifteen +phrase window sentence phrase +sentence hello there comma space phrase how are you question diff --git a/tests/testcases_expected_linux.txt b/tests/testcases_expected_linux.txt index 4584b73..24f1b0b 100644 --- a/tests/testcases_expected_linux.txt +++ b/tests/testcases_expected_linux.txt @@ -42,3 +42,5 @@ `/usr/bin/xdotool key ctrl+Left` `/usr/bin/xdotool key 2 key 5` `/usr/bin/xdotool key 4 key 0 key 2 key 8 key 1 key 5` +`/usr/bin/xdotool key w key i key n key d key o key w key space key s key e key n key t key e key n key c key e key space key p key h key r key a key s key e` +`/usr/bin/xdotool key H key e key l key l key o key space key t key h key e key r key e key comma key space key h key o key w key space key a key r key e key space key y key o key u key question` diff --git a/tests/testcases_expected_mac.txt b/tests/testcases_expected_mac.txt index c423248..a4c0379 100644 --- a/tests/testcases_expected_mac.txt +++ b/tests/testcases_expected_mac.txt @@ -42,3 +42,5 @@ `cliclick w:10 kd:ctrl kp:arrow-left ku:ctrl` `cliclick t:2 t:5` `cliclick t:4 t:0 t:2 t:8 t:1 t:5` +`cliclick t:w t:i t:n t:d t:o t:w kp:space t:s t:e t:n t:t t:e t:n t:c t:e kp:space t:p t:h t:r t:a t:s t:e` +`cliclick t:H t:e t:l t:l t:o kp:space t:t t:h t:e t:r t:e t:',' kp:space t:h t:o t:w kp:space t:a t:r t:e kp:space t:y t:o t:u t:'?'` diff --git a/tests/testcases_expected_windows_belgiankeymap.txt b/tests/testcases_expected_windows_belgiankeymap.txt index 4f57000..7e0e80d 100644 --- a/tests/testcases_expected_windows_belgiankeymap.txt +++ b/tests/testcases_expected_windows_belgiankeymap.txt @@ -42,3 +42,5 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbc` diff --git a/tests/testcases_expected_windows_englishuskeymap.txt b/tests/testcases_expected_windows_englishuskeymap.txt index 59a163e..e61b93e 100644 --- a/tests/testcases_expected_windows_englishuskeymap.txt +++ b/tests/testcases_expected_windows_englishuskeymap.txt @@ -42,3 +42,5 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbf`