Skip to content

Commit

Permalink
Change some terminals to rules in the grammar
Browse files Browse the repository at this point in the history
  • Loading branch information
pcoccoli committed Jul 17, 2023
1 parent c0eca45 commit a1283dd
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 48 deletions.
58 changes: 25 additions & 33 deletions src/kestrel/syntax/kestrel.lark
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ start: statement*

statement: assignment
| command_no_result

// If no VARIABLE is given, default to _ in post-parsing
// For assign or merge, the result variable is required
// This eliminates meaningless huntflows like `var1 var2 var3`
Expand Down Expand Up @@ -46,7 +46,7 @@ join: "JOIN"i VARIABLE "," VARIABLE (BY ATTRIBUTE "," ATTRIBUTE)?

load: "LOAD"i stdpath ("AS"i ENTITY_TYPE)?

new: "NEW"i ENTITY_TYPE? VAR_DATA
new: "NEW"i ENTITY_TYPE? var_data

sort: "SORT"i VARIABLE BY ATTRIBUTE (ASC|DESC)?

Expand Down Expand Up @@ -104,8 +104,8 @@ offset_clause: "OFFSET"i INT
| comparison_null
| "(" disjunction ")"

comparison_std: ENTITY_ATTRIBUTE_PATH OP value
comparison_null: ENTITY_ATTRIBUTE_PATH NULL_OP NULL
comparison_std: ENTITY_ATTRIBUTE_PATH op value
comparison_null: ENTITY_ATTRIBUTE_PATH null_op NULL

//
// Timespan
Expand All @@ -114,15 +114,15 @@ comparison_null: ENTITY_ATTRIBUTE_PATH NULL_OP NULL
?timespan: "start"i timestamp "stop"i timestamp -> timespan_absolute
| "last"i INT timeunit -> timespan_relative

?timeunit: DAY
| HOUR
| MINUTE
| SECOND
?timeunit: day
| hour
| minute
| second

DAY: "days"i | "day"i | "d"i
HOUR: "hours"i | "hour"i | "h"i
MINUTE: "minutes"i | "minute"i | "m"i
SECOND: "seconds"i | "second"i | "s"i
day: "days"i | "day"i | "d"i
hour: "hours"i | "hour"i | "h"i
minute: "minutes"i | "minute"i | "m"i
second: "seconds"i | "second"i | "s"i

timestamp: ISOTIMESTAMP
| "\"" ISOTIMESTAMP "\""
Expand Down Expand Up @@ -189,17 +189,15 @@ ANALYTICS_ESCAPED: PATH_ESCAPED
// Two-level JSON in command NEW
//

// use terminal to load the entire VAR_DATA without parsing into it
// add `WS*` since `%ignore WS` doesn't apply to spaces inside terminals
// https://github.com/lark-parser/lark/issues/99
VAR_DATA: "[" (RAW_VALUES | JSON_OBJS) "]"
// use terminal to load the entire var_data without parsing into it
var_data: "[" (RAW_VALUES | json_objs) "]"

RAW_VALUES: ESCAPED_STRING_WS ("," ESCAPED_STRING_WS)*

JSON_OBJS: JSON_OBJ ("," JSON_OBJ)*
JSON_OBJ: WS* "{" JSON_PAIR ("," JSON_PAIR)* "}" WS*
JSON_PAIR: ESCAPED_STRING_WS ":" JSON_VALUE
JSON_VALUE: WS* (NUMBER|ESCAPED_STRING|TRUE|FALSE|NULL) WS*
json_objs: json_obj ("," json_obj)*
json_obj: "{" json_pair ("," json_pair)* "}"
json_pair: ESCAPED_STRING ":" json_value
json_value: (NUMBER|ESCAPED_STRING|TRUE|FALSE|NULL)

//
// Arguments
Expand Down Expand Up @@ -228,24 +226,18 @@ NOT: "NOT"i
ISSUBSET: "ISSUBSET"i
ISSUPERSET: "ISSUPERSET"i

OP: OP_SIGN
| (NOT WS+)? OP_KEYWORD
op: OP_SIGN
| NOT? op_keyword

OP_SIGN: "="
| "=="
| "!="
| ">"
| "<"
| ">="
| ">="
OP_SIGN: /([!=]?=|[<>]=?)/

OP_KEYWORD: IN
op_keyword: IN
| LIKE
| MATCHES
| ISSUBSET
| ISSUPERSET

NULL_OP: IS (WS+ NOT)?
null_op: IS (NOT)?

//
// Common language constructs
Expand All @@ -263,7 +255,7 @@ literal_list: "(" literal ("," literal)* ")"

reference_or_simple_string: ECNAME ("." ATTRIBUTE)?

string: ADVANCED_STRING
?string: advanced_string

number: NUMBER

Expand Down Expand Up @@ -295,7 +287,7 @@ SIMPLE_STRING: ECNAME

// nearly Python string, but no [ubf]? as prefix options
// check Lark example of Python parser for reference
ADVANCED_STRING: /(r?)("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/
advanced_string: /(r?)("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/

%import common (LETTER, DIGIT, WS, INT, WORD, NUMBER, CNAME, _STRING_ESC_INNER)
%import common.SH_COMMENT -> COMMENT
Expand Down
89 changes: 74 additions & 15 deletions src/kestrel/syntax/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,20 @@ def save(self, args):
return packet

def new(self, args):
if len(args) == 1:
# Try to get entity type from first entity
data = args[0]
if isinstance(data, list):
entity_type = data[0].get("type")
else:
entity_type = None
else:
entity_type = _first(args)
data = args[1]
return {
"command": "new",
"type": self._extract_entity_type(args),
"data": self._assert_and_extract_single("VAR_DATA", args),
"type": entity_type,
"data": data,
}

def expression(self, args):
Expand Down Expand Up @@ -236,13 +246,13 @@ def expression_and(self, args):
def comparison_std(self, args):
etype, attr = _extract_entity_and_attribute(args[0].value)
# remove more than one spaces; capitalize op
op = " ".join(_second(args).split()).upper()
op = args[1]
value = args[2]
return ECGPComparison(attr, op, value, etype)

def comparison_null(self, args):
etype, attr = _extract_entity_and_attribute(args[0].value)
op = _second(args)
op = args[1]
if "NOT" in op:
op = "!="
else:
Expand Down Expand Up @@ -271,12 +281,9 @@ def reference_or_simple_string(self, args):
v = _first(args)
return v

def string(self, args):
def advanced_string(self, args):
raw = _first(args)
if args[0].type == self.token_prefix + "SIMPLE_STRING":
value = raw
elif args[0].type == self.token_prefix + "ADVANCED_STRING":
value = unescape_quoted_string(raw)
value = unescape_quoted_string(raw)
return value

def number(self, args):
Expand Down Expand Up @@ -311,13 +318,13 @@ def offset_clause(self, args):
def timespan_relative(self, args):
num = int(args[0])
unit = args[1]
if unit.type == self.token_prefix + "DAY":
if unit == "DAY":
delta = timedelta(days=num)
elif unit.type == self.token_prefix + "HOUR":
elif unit == "HOUR":
delta = timedelta(hours=num)
elif unit.type == self.token_prefix + "MINUTE":
elif unit == "MINUTE":
delta = timedelta(minutes=num)
elif unit.type == self.token_prefix + "SECOND":
elif unit == "SECOND":
delta = timedelta(seconds=num)
stop = datetime.utcnow()
start = stop - delta
Expand All @@ -328,8 +335,48 @@ def timespan_absolute(self, args):
stop = to_datetime(args[1])
return {"timerange": (start, stop)}

def day(self, _args):
return "DAY"

def hour(self, _args):
return "HOUR"

def minute(self, _args):
return "MINUTE"

def second(self, _args):
return "SECOND"

def timestamp(self, args):
return self._assert_and_extract_single("ISOTIMESTAMP", args)
return args[0]

def var_data(self, args):
if isinstance(args[0], Token):
# Restore the brackets
v = "[" + _first(args) + "]"
else:
v = args[0]
return v

def json_objs(self, args):
return args

def json_obj(self, args):
return dict(args)

def json_pair(self, args):
v = _first(args)
if "ESCAPED_STRING" in args[0].type:
v = unescape_quoted_string(v)
return v, args[1]

def json_value(self, args):
v = _first(args)
if "ESCAPED_STRING" in args[0].type:
v = unescape_quoted_string(v)
elif args[0].type == self.token_prefix + "NUMBER":
v = float(v) if "." in v else int(v)
return v

def entity_type(self, args):
return _first(args)
Expand Down Expand Up @@ -374,7 +421,7 @@ def bin_func(self, args):
attr = _first(args)
num = int(_second(args))
if len(args) >= 3:
unit = _third(args)
unit = args[2][0] # Only pass 1st letter (d, h, m, or s)
else:
unit = None
alias = f"{attr}_bin"
Expand All @@ -398,6 +445,18 @@ def args(self, args):
def arg_kv_pair(self, args):
return {_first(args): args[1]}

def op(self, args):
return " ".join([arg.upper() for arg in args])

def op_sign(self, args):
return _first(args) if args else ""

def op_keyword(self, args):
return _first(args)

def null_op(self, args):
return " ".join([arg.upper() for arg in args])

def _extract_vars(self, args):
var_names = []
for arg in args:
Expand Down

0 comments on commit a1283dd

Please sign in to comment.