Skip to content

Commit

Permalink
Merge pull request #459 from opencybersecurityalliance/k2-irg-sourcehop
Browse files Browse the repository at this point in the history
K2 irg sourcehop
  • Loading branch information
subbyte authored Jan 16, 2024
2 parents 83faa6c + ab243ee commit 5c8946b
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 4 deletions.
2 changes: 1 addition & 1 deletion packages-nextgen/kestrel_core/src/kestrel/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ def load_config():


if __name__ == "__main__":
...
...
6 changes: 5 additions & 1 deletion packages-nextgen/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ class MultiInterfacesInGraph(KestrelError):
pass


class MultiSourcesInGraph(KestrelError):
pass


class InvalidSerializedDatasourceInterfaceCacheCatalog(KestrelError):
pass

Expand All @@ -71,4 +75,4 @@ class InterfaceNotFound(KestrelError):


class InterfaceNameCollision(KestrelError):
pass
pass
55 changes: 55 additions & 0 deletions packages-nextgen/kestrel_core/src/kestrel/ir/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
DuplicatedDataSource,
DuplicatedSingletonInstruction,
MultiInterfacesInGraph,
MultiSourcesInGraph,
InevaluableInstruction,
)
from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
Expand Down Expand Up @@ -481,6 +482,43 @@ def find_dependent_subgraphs_of_node(

return dep_graphs

def find_simple_query_subgraphs(self) -> Iterable[IRGraphSimpleQuery]:
"""Find dependency subgraphs those are IRGraphSimpleQuery
Some interfaces, e.g., ELasticsearch/OpenSearch, do not support JOIN or
sub query/SELECT, so they can only evaluate a simple SQL query around
each source node. Use this method to prepare such tiny graph segments
for evaluation by the interface. The remaining of the graph can be
evaluated in cache.
Returns:
An iterator of simple-query subgraphs
"""

# TODO: non-linear graph inclding referred nodes (need backtracking)

for n in self.get_nodes_by_type(SourceInstruction):
for g in self._find_paths_from_node_to_a_variable(n):
yield IRGraphSimpleQuery(g)

def _find_paths_from_node_to_a_variable(
self, node: Instruction
) -> Iterable[IRGraph]:
"""Find linear IRGraph (path) from the starting node to its closest variables
Parameters:
node: the node to start path search
Returns:
An iterator of linear IRGraphs
"""
for succ in self.successors(node):
if isinstance(succ, Variable):
yield self.subgraph([succ, node])
else:
for succ_graph in self._find_paths_from_node_to_a_variable(succ):
yield self.subgraph(list(succ_graph.nodes()) + [node])

def to_dict(self) -> Mapping[str, Iterable[Mapping]]:
"""Serialize to a Python dictionary (D3 graph format)
Expand Down Expand Up @@ -653,3 +691,20 @@ def _add_node(self, node: Instruction, deref: bool = True) -> Instruction:
else:
self.interface = node.interface
return super()._add_node(node, deref)


@typechecked
class IRGraphSimpleQuery(IRGraphEvaluable):
"""Simple Query IRGraph
A simple query IRGraph is an evaluatable IRGraph that
1. It contains one source node
2. It can be compiled into a simple (not nested/joined) SQL query
"""

def __init__(self, graph: IRGraph):
if len(graph.get_nodes_by_type(SourceInstruction)) > 1:
raise MultiSourcesInGraph()
super().__init__(graph)
6 changes: 5 additions & 1 deletion packages-nextgen/kestrel_core/src/kestrel/ir/instructions.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,11 @@ class IntermediateInstruction(Instruction):

@dataclass(eq=False)
class Return(TransformingInstruction):
"""The sink instruction that forces execution"""
"""The sink instruction that forces execution
Return is implemented as a TransformingInstruction so it triggers
IRGraph._add_node_with_dependent_node() in IRGraph.add_node()
"""

# the order/sequence of return instruction in huntflow (source code)
sequence: int = 0
Expand Down
40 changes: 39 additions & 1 deletion packages-nextgen/kestrel_core/tests/test_ir_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
CACHE_INTERFACE_IDENTIFIER,
)
from kestrel.ir.filter import StrComparison, StrCompOp
from kestrel.ir.graph import IRGraph
from kestrel.ir.graph import IRGraph, IRGraphSimpleQuery
from kestrel.frontend.parser import parse_kestrel
from kestrel.cache import InMemoryCache

Expand Down Expand Up @@ -345,3 +345,41 @@ def test_find_dependent_subgraphs_of_node():
assert graph.get_variable("p31") in gs[0]
assert p5 in gs[0]
assert ret in gs[0]


def test_find_dependent_subgraphs_of_node():
huntflow = """
p1 = GET process FROM elastic://edr1
WHERE name = "cmd.exe"
LAST 5 DAYS
p2 = GET process FROM elastic://edr1
WHERE pid = 999
LAST 30 MINUTES
p3 = GET process FROM stixshifter://edr2
WHERE parent_ref.name = "powershell.exe"
LAST 24 HOURS
p4 = GET process FROM stixshifter://edr2
WHERE command_line LIKE "%powershell.exe%"
LAST 1 HOURS
p11 = p1 WHERE pid = 999
p12 = p1 WHERE pid = 888
p21 = p2 WHERE name = "cmd.exe"
p22 = p2 WHERE name = "powershell.exe"
DISP p1 ATTR name
DISP p12 ATTR name
"""
graph = parse_kestrel(huntflow)
vs = set(["p1", "p2", "p3", "p4"])
for g in graph.find_simple_query_subgraphs():
assert isinstance(g, IRGraphSimpleQuery)
assert Counter(map(type, g.nodes())) == Counter([Variable, Filter, ProjectEntity, DataSource])
assert len(g.edges()) == 3
varname = g.get_variables()[0].name
assert varname in vs
vs.remove(varname)
assert vs == set()

0 comments on commit 5c8946b

Please sign in to comment.