From a41abe8b2520ce73cd799589b6121629614e13d8 Mon Sep 17 00:00:00 2001 From: Joris Van Looveren Date: Mon, 15 Oct 2018 11:17:59 +0200 Subject: [PATCH 1/3] Automatically add rules for all terminals to specific, annotated rules. --- grammar/main.py | 6 ++ grammar/parse.py | 72 +++++++++++++++---- tests/testcases.txt | 1 + tests/testcases_expected_linux.txt | 1 + tests/testcases_expected_mac.txt | 1 + ...stcases_expected_windows_belgiankeymap.txt | 1 + ...cases_expected_windows_englishuskeymap.txt | 2 + 7 files changed, 72 insertions(+), 12 deletions(-) diff --git a/grammar/main.py b/grammar/main.py index b4b6b13..2ed3f08 100644 --- a/grammar/main.py +++ b/grammar/main.py @@ -16,8 +16,14 @@ else: f = sys.stdin + # The parser is instantiated twice: once to allow + # the collection of terminals from the instantiated + # parser (in find_keywords), and then again, augmented + # with additional rules, created automatically from the + # set of terminals. parser = SingleInputParser() find_keywords(parser) # init lexer + parser = SingleInputParser() while True: line = f.readline() diff --git a/grammar/parse.py b/grammar/parse.py index 5e2049f..58f82d2 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -3,6 +3,7 @@ from spark import GenericParser from spark import GenericASTBuilder from ast import AST +import scan class GrammaticalError(Exception): def __init__(self, string): @@ -11,9 +12,64 @@ def __str__(self): return self.string class CoreParser(GenericParser): + def __init__(self, start): + # check if we have to add terminal rules, and + # do so if the list of terminals is known + self.install_terminal_rules() + # initialize and set up the grammar rules GenericParser.__init__(self, start) + # In our grammar, the token type ANY does not match any of the other + # token types. In some cases, this is not the desired behavior, e.g. for + # "word " you want to be able to be "five" or "sentence" or + # any other word that may have been used as a terminal in the grammar. + # This becomes more of an issue as you add macros, and more words become + # reserved. + # We can work around this limitation by adding rules for terminals + # that we want to allow; however, with many terminals this will + # quickly become infeasible. + # The function and function decorator below work together to automate this. + # (The decorator is needed to modify the docstring programmatically.) + # We rely on the fact that in main.py, we already collect a list of + # terminals (using find_terminals()). This does mean, however, that we + # have to instantiate the parser twice: first in "basic" form, which is + # used to collect the terminals, and then again in "decorated" form, where + # we automatically add the desired terminal rules. + + def install_terminal_rules(self): + # if we have a list of terminals available: walk all rules, and see + # if they were annotated with @add_rules_for_terminals. If so, we add + # new rules based on the template for that rule and the terminals. + try: + if scan.keywords is not None: + for item in CoreParser.__dict__: + if item.startswith("p_"): + function = CoreParser.__dict__[item] + try: + # this will trigger an AttributeError + # for functions that were not annotated: + template = function._rule_template + for kw in scan.keywords: + function.__doc__ += \ + (template.format(kw) + "\n") + except AttributeError: + pass + except AttributeError: + pass + + # function decorator: adding @add_rules_for_termination("") + # before a function declaration will add the given rule template + # as a new attribute to the function. + # This is used to signal that for this function, we have to add a new rule + # for each terminal, so that the terminal can be used in the spoken text. + def add_rules_for_terminals(rule_template): + def add_attrs(func): + func._rule_template = rule_template + return func + return add_attrs + + def typestring(self, token): return token.type @@ -347,10 +403,13 @@ def p_modifiers(self, args): else: return AST('mod_plus_key', [ value[args[0].type] ], [ args[1] ] ) + @add_rules_for_terminals("english ::= word {}") def p_english(self, args): ''' english ::= word ANY ''' + if args[1].type != 'ANY': + return AST('sequence', [ args[1].type ]) return AST('sequence', [ args[1].extra ]) def p_word_sentence(self, args): @@ -379,21 +438,10 @@ def p_word_repeat(self, args): args[1].children.insert(0, AST('null', args[0])) return args[1] + @add_rules_for_terminals("raw_word ::= {}") def p_raw_word(self, args): ''' raw_word ::= ANY - raw_word ::= zero - raw_word ::= one - raw_word ::= two - raw_word ::= three - raw_word ::= four - raw_word ::= five - raw_word ::= six - raw_word ::= seven - raw_word ::= eight - raw_word ::= nine - raw_word ::= to - raw_word ::= for ''' if(args[0].type == 'ANY'): return args[0].extra diff --git a/tests/testcases.txt b/tests/testcases.txt index 89853f6..360df9f 100644 --- a/tests/testcases.txt +++ b/tests/testcases.txt @@ -42,3 +42,4 @@ control space control left number twenty five number four hundred two thousand eight hundred fifteen +phrase window sentence phrase diff --git a/tests/testcases_expected_linux.txt b/tests/testcases_expected_linux.txt index 4584b73..c9cfaee 100644 --- a/tests/testcases_expected_linux.txt +++ b/tests/testcases_expected_linux.txt @@ -42,3 +42,4 @@ `/usr/bin/xdotool key ctrl+Left` `/usr/bin/xdotool key 2 key 5` `/usr/bin/xdotool key 4 key 0 key 2 key 8 key 1 key 5` +`/usr/bin/xdotool key w key i key n key d key o key w key space key s key e key n key t key e key n key c key e key space key p key h key r key a key s key e` diff --git a/tests/testcases_expected_mac.txt b/tests/testcases_expected_mac.txt index c423248..3ce92f3 100644 --- a/tests/testcases_expected_mac.txt +++ b/tests/testcases_expected_mac.txt @@ -42,3 +42,4 @@ `cliclick w:10 kd:ctrl kp:arrow-left ku:ctrl` `cliclick t:2 t:5` `cliclick t:4 t:0 t:2 t:8 t:1 t:5` +`cliclick t:w t:i t:n t:d t:o t:w kp:space t:s t:e t:n t:t t:e t:n t:c t:e kp:space t:p t:h t:r t:a t:s t:e` diff --git a/tests/testcases_expected_windows_belgiankeymap.txt b/tests/testcases_expected_windows_belgiankeymap.txt index 4f57000..2c0411e 100644 --- a/tests/testcases_expected_windows_belgiankeymap.txt +++ b/tests/testcases_expected_windows_belgiankeymap.txt @@ -42,3 +42,4 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` diff --git a/tests/testcases_expected_windows_englishuskeymap.txt b/tests/testcases_expected_windows_englishuskeymap.txt index 59a163e..a56c660 100644 --- a/tests/testcases_expected_windows_englishuskeymap.txt +++ b/tests/testcases_expected_windows_englishuskeymap.txt @@ -42,3 +42,5 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress ctrl+left` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` + From 67d0b8461f8d8704d315f7bbb055da5d6f1a083b Mon Sep 17 00:00:00 2001 From: Joris Van Looveren Date: Wed, 17 Oct 2018 22:28:58 +0200 Subject: [PATCH 2/3] Add 'exclusions' list to add_rules_for_terminals function decorator --- grammar/parse.py | 16 +++++++++++++--- tests/testcases.txt | 1 + tests/testcases_expected_linux.txt | 1 + tests/testcases_expected_mac.txt | 1 + .../testcases_expected_windows_belgiankeymap.txt | 1 + ...estcases_expected_windows_englishuskeymap.txt | 2 +- 6 files changed, 18 insertions(+), 4 deletions(-) diff --git a/grammar/parse.py b/grammar/parse.py index 58f82d2..4e574c5 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -50,7 +50,8 @@ def install_terminal_rules(self): # this will trigger an AttributeError # for functions that were not annotated: template = function._rule_template - for kw in scan.keywords: + exclusions = function._exclusions + for kw in set(scan.keywords) - set(exclusions): function.__doc__ += \ (template.format(kw) + "\n") except AttributeError: @@ -63,9 +64,10 @@ def install_terminal_rules(self): # as a new attribute to the function. # This is used to signal that for this function, we have to add a new rule # for each terminal, so that the terminal can be used in the spoken text. - def add_rules_for_terminals(rule_template): + def add_rules_for_terminals(rule_template, exclusions=[]): def add_attrs(func): func._rule_template = rule_template + func._exclusions = exclusions return func return add_attrs @@ -438,7 +440,15 @@ def p_word_repeat(self, args): args[1].children.insert(0, AST('null', args[0])) return args[1] - @add_rules_for_terminals("raw_word ::= {}") + # 'exclusions' contains the terminals that should continue to be + # treated as commands. As it is, the list is somewhat arbitrary; + # it contains modifier keys and a subset of the special characters from + # the "p_character" rule. Modify as desired. + @add_rules_for_terminals("raw_word ::= {}", exclusions = \ + ['control', 'alt', 'alternative', + 'colon', 'semicolon', 'bang', 'hash', 'percent', + 'ampersand', 'star', 'minus', 'underscore', 'plus', + 'backslash', 'question', 'comma']) def p_raw_word(self, args): ''' raw_word ::= ANY diff --git a/tests/testcases.txt b/tests/testcases.txt index 360df9f..3f9e767 100644 --- a/tests/testcases.txt +++ b/tests/testcases.txt @@ -43,3 +43,4 @@ control left number twenty five number four hundred two thousand eight hundred fifteen phrase window sentence phrase +sentence hello there comma space phrase how are you question diff --git a/tests/testcases_expected_linux.txt b/tests/testcases_expected_linux.txt index c9cfaee..24f1b0b 100644 --- a/tests/testcases_expected_linux.txt +++ b/tests/testcases_expected_linux.txt @@ -43,3 +43,4 @@ `/usr/bin/xdotool key 2 key 5` `/usr/bin/xdotool key 4 key 0 key 2 key 8 key 1 key 5` `/usr/bin/xdotool key w key i key n key d key o key w key space key s key e key n key t key e key n key c key e key space key p key h key r key a key s key e` +`/usr/bin/xdotool key H key e key l key l key o key space key t key h key e key r key e key comma key space key h key o key w key space key a key r key e key space key y key o key u key question` diff --git a/tests/testcases_expected_mac.txt b/tests/testcases_expected_mac.txt index 3ce92f3..a4c0379 100644 --- a/tests/testcases_expected_mac.txt +++ b/tests/testcases_expected_mac.txt @@ -43,3 +43,4 @@ `cliclick t:2 t:5` `cliclick t:4 t:0 t:2 t:8 t:1 t:5` `cliclick t:w t:i t:n t:d t:o t:w kp:space t:s t:e t:n t:t t:e t:n t:c t:e kp:space t:p t:h t:r t:a t:s t:e` +`cliclick t:H t:e t:l t:l t:o kp:space t:t t:h t:e t:r t:e t:',' kp:space t:h t:o t:w kp:space t:a t:r t:e kp:space t:y t:o t:u t:'?'` diff --git a/tests/testcases_expected_windows_belgiankeymap.txt b/tests/testcases_expected_windows_belgiankeymap.txt index 2c0411e..7e0e80d 100644 --- a/tests/testcases_expected_windows_belgiankeymap.txt +++ b/tests/testcases_expected_windows_belgiankeymap.txt @@ -43,3 +43,4 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbc` diff --git a/tests/testcases_expected_windows_englishuskeymap.txt b/tests/testcases_expected_windows_englishuskeymap.txt index a56c660..e61b93e 100644 --- a/tests/testcases_expected_windows_englishuskeymap.txt +++ b/tests/testcases_expected_windows_englishuskeymap.txt @@ -43,4 +43,4 @@ `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 2 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress 4 0 2 8 1 5` `C:\Tools\nircmd-x64\nircmd.exe sendkeypress w i n d o w spc s e n t e n c e spc p h r a s e` - +`C:\Tools\nircmd-x64\nircmd.exe sendkeypress H e l l o spc t h e r e 0xbc spc h o w spc a r e spc y o u shift+0xbf` From b8f3b52cc6a580db40b769151e5019b81db262ba Mon Sep 17 00:00:00 2001 From: Joris Van Looveren Date: Thu, 18 Oct 2018 21:47:19 +0200 Subject: [PATCH 3/3] Move terminal collection to parser; rework parser init (no second init needed); verify lm.py --- grammar/lm.py | 14 +++-------- grammar/main.py | 10 ++------ grammar/parse.py | 64 ++++++++++++++++++++++++++++-------------------- grammar/scan.py | 7 ++---- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/grammar/lm.py b/grammar/lm.py index 8cb7f9a..9344494 100644 --- a/grammar/lm.py +++ b/grammar/lm.py @@ -109,24 +109,16 @@ def make_lm(rules, visited, which, prefix): print prefix, t new_prefix.append(t) -def get_terminals(parser): - visited = {} - terminals = [] - find_terminals(parser.rules, visited, 'START', terminals) - keywords = set(terminals) - return sorted(keywords) - if __name__ == '__main__': import sys parser = SingleInputParser() #for rule in parser.rules: # print rule, parser.rules[rule] - visited = {} + #visited = {} #make_lm(parser.rules, visited, 'START', []) - terminals = [] - find_terminals(parser.rules, visited, 'START', terminals) - #print terminals + + terminals = parser.terminals visited = {} find_sequences(parser.rules, visited, 'START') diff --git a/grammar/main.py b/grammar/main.py index 2ed3f08..3ebc4e8 100644 --- a/grammar/main.py +++ b/grammar/main.py @@ -1,6 +1,6 @@ # Main file. Parse new commands from stdin until EOF. -from scan import find_keywords +from scan import install_keywords from scan import scan from parse import parse from parse import GrammaticalError @@ -16,14 +16,8 @@ else: f = sys.stdin - # The parser is instantiated twice: once to allow - # the collection of terminals from the instantiated - # parser (in find_keywords), and then again, augmented - # with additional rules, created automatically from the - # set of terminals. parser = SingleInputParser() - find_keywords(parser) # init lexer - parser = SingleInputParser() + install_keywords(parser) # init lexer while True: line = f.readline() diff --git a/grammar/parse.py b/grammar/parse.py index 4e574c5..c2d1727 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -3,7 +3,6 @@ from spark import GenericParser from spark import GenericASTBuilder from ast import AST -import scan class GrammaticalError(Exception): def __init__(self, string): @@ -13,12 +12,31 @@ def __str__(self): class CoreParser(GenericParser): + terminals = [] + def __init__(self, start): - # check if we have to add terminal rules, and - # do so if the list of terminals is known - self.install_terminal_rules() # initialize and set up the grammar rules GenericParser.__init__(self, start) + # after the "base" initialization, collect all terminals + visited = {} + self.find_terminals(self.rules, visited, 'START', self.terminals) + self.terminals = list(set(self.terminals)) # remove duplicates + # add terminal rules if needed + self.install_terminal_rules() + # re-initialize the parser rules + GenericParser.__init__(self, start) + + # collect all terminals from the grammar rules + def find_terminals(self, rules, visited, which, found): + if which in visited: return + visited[which] = 1 + for r in rules[which]: + (name, tokens) = r + for t in tokens: + if t in rules: + self.find_terminals(rules, visited, t, found) + elif t != 'END' and t != 'ANY' and t != '|-': + found.append(t) # In our grammar, the token type ANY does not match any of the other # token types. In some cases, this is not the desired behavior, e.g. for @@ -31,33 +49,25 @@ def __init__(self, start): # quickly become infeasible. # The function and function decorator below work together to automate this. # (The decorator is needed to modify the docstring programmatically.) - # We rely on the fact that in main.py, we already collect a list of - # terminals (using find_terminals()). This does mean, however, that we - # have to instantiate the parser twice: first in "basic" form, which is - # used to collect the terminals, and then again in "decorated" form, where - # we automatically add the desired terminal rules. - + def install_terminal_rules(self): # if we have a list of terminals available: walk all rules, and see # if they were annotated with @add_rules_for_terminals. If so, we add # new rules based on the template for that rule and the terminals. - try: - if scan.keywords is not None: - for item in CoreParser.__dict__: - if item.startswith("p_"): - function = CoreParser.__dict__[item] - try: - # this will trigger an AttributeError - # for functions that were not annotated: - template = function._rule_template - exclusions = function._exclusions - for kw in set(scan.keywords) - set(exclusions): - function.__doc__ += \ - (template.format(kw) + "\n") - except AttributeError: - pass - except AttributeError: - pass + if len(self.terminals) > 0: + for item in CoreParser.__dict__: + if item.startswith("p_"): + function = CoreParser.__dict__[item] + try: + # this will trigger an AttributeError + # for functions that were not annotated: + template = function._rule_template + exclusions = function._exclusions + for kw in set(self.terminals) - set(exclusions): + function.__doc__ += \ + (template.format(kw) + "\n") + except AttributeError: + pass # function decorator: adding @add_rules_for_termination("") # before a function declaration will add the given rule template diff --git a/grammar/scan.py b/grammar/scan.py index d4c4d5f..ddcb408 100644 --- a/grammar/scan.py +++ b/grammar/scan.py @@ -1,11 +1,8 @@ # Lexer that produces a sequence of tokens (keywords + ANY). -import re -from lm import get_terminals - -def find_keywords(parser): +def install_keywords(parser): global keywords - keywords = get_terminals(parser) + keywords = parser.terminals class Token: def __init__(self, type, wordno=-1, extra=''):