From a12c1899def7690aa3d600bec0de75e5866d8b72 Mon Sep 17 00:00:00 2001 From: Joris Van Looveren Date: Fri, 19 Oct 2018 20:35:20 +0200 Subject: [PATCH 1/2] Subgrammars refactored in mixin classes --- grammar/grammars/__init__.py | 1 + grammar/grammars/english.py | 60 ++++++++++ grammar/grammars/numbers.py | 151 ++++++++++++++++++++++++ grammar/parse.py | 223 +++++------------------------------ 4 files changed, 239 insertions(+), 196 deletions(-) create mode 100644 grammar/grammars/__init__.py create mode 100644 grammar/grammars/english.py create mode 100644 grammar/grammars/numbers.py diff --git a/grammar/grammars/__init__.py b/grammar/grammars/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/grammar/grammars/__init__.py @@ -0,0 +1 @@ + diff --git a/grammar/grammars/english.py b/grammar/grammars/english.py new file mode 100644 index 0000000..04abc38 --- /dev/null +++ b/grammar/grammars/english.py @@ -0,0 +1,60 @@ +from ast import AST + +class EnglishGrammarMixin(object): + + def __init__(self, *args, **kwargs): + # register this subgrammar on the main grammar in CoreParser + self.add_subgrammar(["english", "word_sentence", "word_phrase"]) + + + def p_english(self, args): + ''' + english ::= word ANY + ''' + return AST('sequence', [ args[1].extra ]) + + def p_word_sentence(self, args): + ''' + word_sentence ::= sentence word_repeat + ''' + if(len(args[1].children) > 0): + args[1].children[0].meta = args[1].children[0].meta.capitalize() + return args[1] + + def p_word_phrase(self, args): + ''' + word_phrase ::= phrase word_repeat + ''' + return args[1] + + def p_word_repeat(self, args): + ''' + word_repeat ::= raw_word + word_repeat ::= raw_word word_repeat + ''' + if(len(args) == 1): + return AST('word_sequence', None, + [ AST('null', args[0]) ]) + else: + args[1].children.insert(0, AST('null', args[0])) + return args[1] + + def p_raw_word(self, args): + ''' + raw_word ::= ANY + raw_word ::= zero + raw_word ::= one + raw_word ::= two + raw_word ::= three + raw_word ::= four + raw_word ::= five + raw_word ::= six + raw_word ::= seven + raw_word ::= eight + raw_word ::= nine + raw_word ::= to + raw_word ::= for + ''' + if(args[0].type == 'ANY'): + return args[0].extra + return args[0].type diff --git a/grammar/grammars/numbers.py b/grammar/grammars/numbers.py new file mode 100644 index 0000000..81dbd97 --- /dev/null +++ b/grammar/grammars/numbers.py @@ -0,0 +1,151 @@ +from ast import AST + + +class NumberGrammarMixin(object): + + def __init__(self, *args, **kwargs): + # register this subgrammar on the main grammar in CoreParser + # "number_rule" is the name of the 'top-level' rule in this + # subgrammar (see method 'p_rule_number' + self.add_subgrammar(["number_rule"]) + + small_numbers = { + 'zero' : 0, + 'one' : 1, + 'two' : 2, + 'three' : 3, + 'four' : 4, + 'five' : 5, + 'six' : 6, + 'seven' : 7, + 'eight' : 8, + 'nine' : 9, + 'ten' : 10, + 'eleven' : 11, + 'twelve' : 12, + 'thirteen' : 13, + 'fourteen' : 14, + 'fifteen' : 15, + 'sixteen' : 16, + 'seventeen' : 17, + 'eighteen' : 18, + 'nineteen' : 19, + + # sadly, kaldi often recognizes these by accident + 'to' : 2, + 'for' : 4, + } + def p_number_rule(self, args): + ''' + number_rule ::= number number_set + number_rule ::= number thousand_number_set + number_rule ::= number million_number_set + number_rule ::= number billion_number_set + ''' + return AST('sequence', [ str(args[1]) ]) + def p_number_set(self, args): + ''' + number_set ::= _firstnumbers + number_set ::= _tens + number_set ::= _tens _ones + number_set ::= _hundreds + number_set ::= _hundreds _firstnumbers + number_set ::= _hundreds _tens + number_set ::= _hundreds _tens _ones + ''' + return sum(args) + def p__ones(self, args): + ''' + _ones ::= one + _ones ::= two + _ones ::= three + _ones ::= four + _ones ::= five + _ones ::= six + _ones ::= seven + _ones ::= eight + _ones ::= nine + _ones ::= to + _ones ::= for + ''' + return self.small_numbers[args[0].type] + def p__firstnumbers(self, args): + ''' + _firstnumbers ::= zero + _firstnumbers ::= one + _firstnumbers ::= two + _firstnumbers ::= three + _firstnumbers ::= four + _firstnumbers ::= five + _firstnumbers ::= six + _firstnumbers ::= seven + _firstnumbers ::= eight + _firstnumbers ::= nine + _firstnumbers ::= ten + _firstnumbers ::= eleven + _firstnumbers ::= twelve + _firstnumbers ::= thirteen + _firstnumbers ::= fourteen + _firstnumbers ::= fifteen + _firstnumbers ::= sixteen + _firstnumbers ::= seventeen + _firstnumbers ::= eighteen + _firstnumbers ::= nineteen + _firstnumbers ::= to + _firstnumbers ::= for + ''' + return self.small_numbers[args[0].type] + def p__tens(self, args): + ''' + _tens ::= twenty + _tens ::= thirty + _tens ::= forty + _tens ::= fifty + _tens ::= sixty + _tens ::= seventy + _tens ::= eighty + _tens ::= ninety + ''' + value = { + 'twenty' : 20, + 'thirty' : 30, + 'forty' : 40, + 'fifty' : 50, + 'sixty' : 60, + 'seventy' : 70, + 'eighty' : 80, + 'ninety' : 90 + } + return value[args[0].type] + def p__hundreds(self, args): + ''' + _hundreds ::= _ones hundred + ''' + return args[0] * 100 + def p_thousand_number_set(self, args): + ''' + thousand_number_set ::= number_set thousand + thousand_number_set ::= number_set thousand number_set + ''' + total = args[0] * 1000 + if len(args) > 2: total += args[2] + return total + def p_million_number_set(self, args): + ''' + million_number_set ::= number_set million + million_number_set ::= number_set million number_set + million_number_set ::= number_set million thousand_number_set + ''' + total = args[0] * 1000000 + if len(args) > 2: total += args[2] + return total + def p_billion_number_set(self, args): + ''' + billion_number_set ::= number_set billion + billion_number_set ::= number_set billion number_set + billion_number_set ::= number_set billion thousand_number_set + billion_number_set ::= number_set billion million_number_set + ''' + total = args[0] * 1000000000 + if len(args) > 2: total += args[2] + return total diff --git a/grammar/parse.py b/grammar/parse.py index 5e2049f..0545e85 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -4,16 +4,42 @@ from spark import GenericASTBuilder from ast import AST +# subgrammars are loaded from the grammars/ subpackage +from grammars import numbers +from grammars import english + class GrammaticalError(Exception): def __init__(self, string): self.string = string def __str__(self): return self.string -class CoreParser(GenericParser): +# include the subgrammars as mixin classes. This makes all the rule +# functions accessible in the CoreParser (and the actual parser in +# GenericParser), and allows the subgrammar classes to call +# 'add_subgrammar' from their __init__ methods. +class CoreParser(GenericParser, + numbers.NumberGrammarMixin, + english.EnglishGrammarMixin): + def __init__(self, start): + # call __init__ on all mixin classes + # skip GenericParser, we'll call that as the last one + for base in CoreParser.__bases__: + if base.__name__ is not "GenericParser": + base.__init__(self, start) + # now set up the actual parser GenericParser.__init__(self, start) + # this method will be called by any subgrammars that want + # to hook themselves into the main grammar. + # this adds one or more rules to the 'single_command' ruleset. + def add_subgrammar(self, subgrammar_start_rule_names): + func = CoreParser.__dict__['p_single_command'] + for start_rule_name in subgrammar_start_rule_names: + func.__doc__ += ("single_command ::= " + start_rule_name + "\n") + + def typestring(self, token): return token.type @@ -36,14 +62,10 @@ def p_single_command(self, args): ''' single_command ::= letter single_command ::= sky_letter - single_command ::= number_rule single_command ::= movement single_command ::= character single_command ::= editing single_command ::= modifiers - single_command ::= english - single_command ::= word_sentence - single_command ::= word_phrase ''' return args[0] @@ -71,146 +93,6 @@ def p_repeat(self, args): else: return None - small_numbers = { - 'zero' : 0, - 'one' : 1, - 'two' : 2, - 'three' : 3, - 'four' : 4, - 'five' : 5, - 'six' : 6, - 'seven' : 7, - 'eight' : 8, - 'nine' : 9, - 'ten' : 10, - 'eleven' : 11, - 'twelve' : 12, - 'thirteen' : 13, - 'fourteen' : 14, - 'fifteen' : 15, - 'sixteen' : 16, - 'seventeen' : 17, - 'eighteen' : 18, - 'nineteen' : 19, - - # sadly, kaldi often recognizes these by accident - 'to' : 2, - 'for' : 4, - } - def p_number_rule(self, args): - ''' - number_rule ::= number number_set - number_rule ::= number thousand_number_set - number_rule ::= number million_number_set - number_rule ::= number billion_number_set - ''' - return AST('sequence', [ str(args[1]) ]) - def p_number_set(self, args): - ''' - number_set ::= _firstnumbers - number_set ::= _tens - number_set ::= _tens _ones - number_set ::= _hundreds - number_set ::= _hundreds _firstnumbers - number_set ::= _hundreds _tens - number_set ::= _hundreds _tens _ones - ''' - return sum(args) - def p__ones(self, args): - ''' - _ones ::= one - _ones ::= two - _ones ::= three - _ones ::= four - _ones ::= five - _ones ::= six - _ones ::= seven - _ones ::= eight - _ones ::= nine - _ones ::= to - _ones ::= for - ''' - return self.small_numbers[args[0].type] - def p__firstnumbers(self, args): - ''' - _firstnumbers ::= zero - _firstnumbers ::= one - _firstnumbers ::= two - _firstnumbers ::= three - _firstnumbers ::= four - _firstnumbers ::= five - _firstnumbers ::= six - _firstnumbers ::= seven - _firstnumbers ::= eight - _firstnumbers ::= nine - _firstnumbers ::= ten - _firstnumbers ::= eleven - _firstnumbers ::= twelve - _firstnumbers ::= thirteen - _firstnumbers ::= fourteen - _firstnumbers ::= fifteen - _firstnumbers ::= sixteen - _firstnumbers ::= seventeen - _firstnumbers ::= eighteen - _firstnumbers ::= nineteen - _firstnumbers ::= to - _firstnumbers ::= for - ''' - return self.small_numbers[args[0].type] - def p__tens(self, args): - ''' - _tens ::= twenty - _tens ::= thirty - _tens ::= forty - _tens ::= fifty - _tens ::= sixty - _tens ::= seventy - _tens ::= eighty - _tens ::= ninety - ''' - value = { - 'twenty' : 20, - 'thirty' : 30, - 'forty' : 40, - 'fifty' : 50, - 'sixty' : 60, - 'seventy' : 70, - 'eighty' : 80, - 'ninety' : 90 - } - return value[args[0].type] - def p__hundreds(self, args): - ''' - _hundreds ::= _ones hundred - ''' - return args[0] * 100 - def p_thousand_number_set(self, args): - ''' - thousand_number_set ::= number_set thousand - thousand_number_set ::= number_set thousand number_set - ''' - total = args[0] * 1000 - if len(args) > 2: total += args[2] - return total - def p_million_number_set(self, args): - ''' - million_number_set ::= number_set million - million_number_set ::= number_set million number_set - million_number_set ::= number_set million thousand_number_set - ''' - total = args[0] * 1000000 - if len(args) > 2: total += args[2] - return total - def p_billion_number_set(self, args): - ''' - billion_number_set ::= number_set billion - billion_number_set ::= number_set billion number_set - billion_number_set ::= number_set billion thousand_number_set - billion_number_set ::= number_set billion million_number_set - ''' - total = args[0] * 1000000000 - if len(args) > 2: total += args[2] - return total def p_sky_letter(self, args): ''' @@ -347,57 +229,6 @@ def p_modifiers(self, args): else: return AST('mod_plus_key', [ value[args[0].type] ], [ args[1] ] ) - def p_english(self, args): - ''' - english ::= word ANY - ''' - return AST('sequence', [ args[1].extra ]) - - def p_word_sentence(self, args): - ''' - word_sentence ::= sentence word_repeat - ''' - if(len(args[1].children) > 0): - args[1].children[0].meta = args[1].children[0].meta.capitalize() - return args[1] - - def p_word_phrase(self, args): - ''' - word_phrase ::= phrase word_repeat - ''' - return args[1] - - def p_word_repeat(self, args): - ''' - word_repeat ::= raw_word - word_repeat ::= raw_word word_repeat - ''' - if(len(args) == 1): - return AST('word_sequence', None, - [ AST('null', args[0]) ]) - else: - args[1].children.insert(0, AST('null', args[0])) - return args[1] - - def p_raw_word(self, args): - ''' - raw_word ::= ANY - raw_word ::= zero - raw_word ::= one - raw_word ::= two - raw_word ::= three - raw_word ::= four - raw_word ::= five - raw_word ::= six - raw_word ::= seven - raw_word ::= eight - raw_word ::= nine - raw_word ::= to - raw_word ::= for - ''' - if(args[0].type == 'ANY'): - return args[0].extra - return args[0].type class SingleInputParser(CoreParser): def __init__(self): From 655088c53890e3656cdbcd22a52976a38bd1a03b Mon Sep 17 00:00:00 2001 From: Van Looveren Date: Mon, 22 Oct 2018 16:01:16 +0200 Subject: [PATCH 2/2] Add 'yes-no' subgrammar --- grammar/grammars/yesno.py | 16 ++++++++++++++++ grammar/parse.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 grammar/grammars/yesno.py diff --git a/grammar/grammars/yesno.py b/grammar/grammars/yesno.py new file mode 100644 index 0000000..debb885 --- /dev/null +++ b/grammar/grammars/yesno.py @@ -0,0 +1,16 @@ +from ast import AST + +class YesNoGrammarMixin(object): + + def __init__(self, *args, **kwargs): + # register this subgrammar on the main grammar in CoreParser + self.add_subgrammar(["yesno"]) + + + def p_yesno(self, args): + ''' + yesno ::= yes + yesno ::= no + ''' + return AST('sequence', [ args[0].type ]) + diff --git a/grammar/parse.py b/grammar/parse.py index 0545e85..2a93b88 100644 --- a/grammar/parse.py +++ b/grammar/parse.py @@ -7,6 +7,7 @@ # subgrammars are loaded from the grammars/ subpackage from grammars import numbers from grammars import english +from grammars import yesno class GrammaticalError(Exception): def __init__(self, string): @@ -20,7 +21,8 @@ def __str__(self): # 'add_subgrammar' from their __init__ methods. class CoreParser(GenericParser, numbers.NumberGrammarMixin, - english.EnglishGrammarMixin): + english.EnglishGrammarMixin, + yesno.YesNoGrammarMixin): def __init__(self, start): # call __init__ on all mixin classes