Skip to content

Commit

Permalink
Merge pull request #16 from ocsf/build-observables
Browse files Browse the repository at this point in the history
Build Observable Type ID Enumeration
  • Loading branch information
query-jeremy authored Jul 31, 2024
2 parents 898764a + dbd932a commit 5fe25e1
Show file tree
Hide file tree
Showing 12 changed files with 188 additions and 23 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ocsf-lib"
version = "0.5.0"
version = "0.6.0"
description = "Tools for working with the OCSF schema"
authors = ["Jeremy Fisher <jeremy@query.ai>"]
readme = "README.md"
Expand Down
8 changes: 6 additions & 2 deletions src/ocsf/compare/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
def main():
parser = ArgumentParser(description="Compare two OCSF schemata")

parser.add_argument("old_schema", help="Path to the old schema file, old schema repository, or the old schema version.")
parser.add_argument("new_schema", help="Path to the new schema file, new schema repository, or the new schema version.")
parser.add_argument(
"old_schema", help="Path to the old schema file, old schema repository, or the old schema version."
)
parser.add_argument(
"new_schema", help="Path to the new schema file, new schema repository, or the new schema version."
)
parser.add_argument(
"--expand-changes",
dest="collapse_changes",
Expand Down
4 changes: 3 additions & 1 deletion src/ocsf/compile/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from .compiler import Compilation
from .options import CompilationOptions


def main():
parser = ArgumentParser(description="Compile an OCSF repository into a schema and dump it as JSON to STDOUT")
parser.add_argument("path", help="Path to the OCSF repository")
Expand Down Expand Up @@ -119,5 +120,6 @@ def main():

print(to_json(compiler.build()))


if __name__ == "__main__":
main()
main()
3 changes: 2 additions & 1 deletion src/ocsf/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .planners.object_type import ObjectTypePlanner
from .planners.uid_names import UidSiblingPlanner
from .planners.datetime import DateTimePlanner
from .planners.observable import MarkObservablesPlanner
from .planners.observable import MarkObservablesPlanner, BuildObservableTypesPlanner
from .merge import MergeResult

FileOperations = dict[RepoPath, list[Operation]]
Expand Down Expand Up @@ -60,6 +60,7 @@ def __init__(self, repo: Repository, options: CompilationOptions = CompilationOp
MarkProfilePlanner(self._proto, options),
IncludePlanner(self._proto, options),
ExtendsPlanner(self._proto, options),
BuildObservableTypesPlanner(self._proto, options),
ExtensionMergePlanner(self._proto, options),
ExcludeProfileAttrsPlanner(self._proto, options),
],
Expand Down
130 changes: 124 additions & 6 deletions src/ocsf/compile/planners/observable.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""Assign the observable value to attributes in the compiled schema output based
"""Observable planners and operations.
Mark Observables
================
Assign the observable value to attributes in the compiled schema output based
on type and attribute name.
This will set the observable ID on all attributes with a type that has an
Expand All @@ -8,6 +12,11 @@
Producers and consumers wishing to build the observables attribute of records
can use this observable property to do so rather than implementing their own
logic to determine the correct observable type_id of attributes.
Build Observable Types
======================
Build the observable type_id enum based on values found in dictionary.json and
across objects and events.
"""

from dataclasses import dataclass
Expand All @@ -21,14 +30,19 @@
from ocsf.repository import (
DefinitionFile,
AttrDefn,
ObjectDefn,
EventDefn,
TypeDefn,
EnumMemberDefn,
DefnWithAttrs,
AnyDefinition,
SpecialFiles,
DictionaryDefn,
DictionaryTypesDefn,
)

# TODO build observable object's type_id enum


class _Registry:
"""A registry of observable attributes and types from the dictionary.json
Expand Down Expand Up @@ -110,15 +124,119 @@ def apply(self, schema: ProtoSchema) -> MergeResult:
return results


@dataclass(eq=True, frozen=True)
class BuildObservableTypeOp(Operation):
registry: Optional[_Registry] = None

def __str__(self):
return f"Build observable types from in {self.prerequisite}"

def apply(self, schema: ProtoSchema) -> MergeResult:
if self.prerequisite is None:
raise ValueError("Prerequisite is required")

target = schema[self.target].data
assert isinstance(target, ObjectDefn)
assert target.attributes is not None
assert "type_id" in target.attributes
assert isinstance(target.attributes["type_id"], AttrDefn)
enum = target.attributes["type_id"].enum
assert enum is not None

results: MergeResult = []
data = schema[self.prerequisite].data

if self.prerequisite == SpecialFiles.DICTIONARY:
assert self.registry is not None
assert isinstance(data, DictionaryDefn)

# Dictionary attribute observables
assert isinstance(data.attributes, dict)
attrs = self.registry.attrs()
for key in attrs:
enum_id = str(attrs[key])
attr = data.attributes[key]
assert isinstance(attr, AttrDefn)
enum[enum_id] = EnumMemberDefn(
caption=attr.caption, description=f"Observable by Dictionary Attribute.<br>{attr.description}"
)
results.append(("attributes", "type_id", "enum", enum_id))

# Dictionary type observables
assert isinstance(data.types, DictionaryTypesDefn)
assert isinstance(data.types.attributes, dict)

types = self.registry.types()
for key in types:
enum_id = str(types[key])
type_ = data.types.attributes[key]
assert isinstance(type_, TypeDefn)
enum[enum_id] = EnumMemberDefn(
caption=type_.caption, description=f"Observable by Dictionary Type.<br>{type_.description}"
)
results.append(("attributes", "type_id", "enum", enum_id))

elif isinstance(data, EventDefn) or isinstance(data, ObjectDefn):
if isinstance(data, ObjectDefn) and data.observable is not None:
obj = self.prerequisite
obj_data = data
while (base := schema.find_base(obj)) is not None:
base_data = schema[base].data
obj = base
if isinstance(base_data, ObjectDefn) and base_data.observable is not None:
obj_data = base_data

# Object observable
enum_id = str(obj_data.observable)
if enum_id not in enum:
enum[enum_id] = EnumMemberDefn(
caption=obj_data.caption, description=f"Observable by Object.<br>{obj_data.description}"
)
results.append(("attributes", "type_id", "enum", enum_id))

if isinstance(data.attributes, dict):
# Object/Event attribute observable
label = "Event" if isinstance(data, EventDefn) else "Object"
for k, v in data.attributes.items():
if isinstance(v, AttrDefn) and v.observable is not None:
enum_id = str(v.observable)
if enum_id not in enum: # Don't overwrite enum values defined in dictionary.json
enum[enum_id] = EnumMemberDefn(
caption=f"{data.caption} {label}: {k}",
description=f'Observable by {label}-Specific Attribute.<br>{label}-specific attribute "{k}" for the {data.caption} {label}.',
)
results.append(("attributes", "type_id", "enum", enum_id))

return results


class MarkObservablesPlanner(Planner):
def __init__(self, schema: ProtoSchema, options: CompilationOptions):
super().__init__(schema, options)
self._registry = _Registry(schema)

def analyze(self, input: DefinitionFile[AnyDefinition]) -> Analysis:
if self._options.set_observable is False:
return []
ops: Analysis = []

if self._options.set_observable is True and isinstance(input.data, DefnWithAttrs):
ops.append(MarkObservablesOp(input.path, registry=self._registry))

return ops


class BuildObservableTypesPlanner(Planner):
def __init__(self, schema: ProtoSchema, options: CompilationOptions):
super().__init__(schema, options)
self._registry = _Registry(schema)

def analyze(self, input: DefinitionFile[AnyDefinition]) -> Analysis:
ops: Analysis = []

if input.path == SpecialFiles.DICTIONARY or (
isinstance(input.data, ObjectDefn) or isinstance(input.data, EventDefn)
):
ops.append(
BuildObservableTypeOp(target=SpecialFiles.OBSERVABLE, prerequisite=input.path, registry=self._registry)
)

if input.data is not None:
if isinstance(input.data, DefnWithAttrs):
return MarkObservablesOp(input.path, registry=self._registry)
return ops
23 changes: 23 additions & 0 deletions src/ocsf/compile/protoschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,29 @@ def find_extension_path(self, name: str) -> str:

raise KeyError(f"Extension {name} not found")

def find_base(self, child: RepoPath, recurse: bool = False) -> RepoPath | None:
"""Find the path to the base object or event for object or event at a given path."""
data = self[child].data
if isinstance(data, ObjectDefn):
if data.extends is not None:
parent = self.find_object(data.extends)
assert isinstance(parent.data, ObjectDefn)
if parent.data.extends is not None and recurse:
return self.find_base(parent.path)
else:
return parent.path

elif isinstance(data, EventDefn):
if data.extends is not None:
parent = self.find_event(data.extends)
assert isinstance(parent.data, EventDefn)
if parent.data.extends is not None and recurse:
return self.find_base(parent.path)
else:
return parent.path

return None

def schema(self) -> OcsfSchema:
schema = OcsfSchema(version="0.0.0") # Version updated below

Expand Down
18 changes: 10 additions & 8 deletions src/ocsf/repository/__init__.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
from .definitions import (
AnyDefinition,
DefinitionT,
IncludeTarget,
AttrDefn,
CategoriesDefn,
CategoryDefn,
DefinitionData,
DefinitionPart,
DefinitionT,
DefnWithAnnotations,
DefnWithAttrs,
DefnWithExtends,
DefnWithExtn,
DefnWithInclude,
DefnWithName,
DeprecationInfoDefn,
DictionaryDefn,
DictionaryTypesDefn,
EnumMemberDefn,
EventDefn,
ExtensionDefn,
IncludeDefn,
IncludeTarget,
ObjectDefn,
ProfileDefn,
TypeDefn,
VersionDefn,
DefnWithName,
DefnWithAttrs,
DefnWithInclude,
DefnWithAnnotations,
DefnWithExtn,
)
from .helpers import (
RepoPaths,
Expand Down Expand Up @@ -54,9 +55,10 @@
"DefinitionT",
"DefnWithAnnotations",
"DefnWithAttrs",
"DefnWithExtends",
"DefnWithExtn",
"DefnWithInclude",
"DefnWithName",
"DefnWithExtn",
"DeprecationInfoDefn",
"DictionaryDefn",
"DictionaryTypesDefn",
Expand Down
2 changes: 2 additions & 0 deletions src/ocsf/repository/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,5 @@ class CategoriesDefn(DefinitionData):
This is only used for definitions that create new record types in the core schema. `dictionary.json` is exempt.
"""

DefnWithExtends = ObjectDefn | EventDefn
1 change: 1 addition & 0 deletions src/ocsf/repository/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class SpecialFiles(StrEnum):
CATEGORIES = "categories.json"
VERSION = "version.json"
EXTENSION = "extension.json"
OBSERVABLE = "objects/observable.json"

@staticmethod
def contains(path: str) -> bool:
Expand Down
5 changes: 4 additions & 1 deletion src/ocsf/schema/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
def main():
parser = ArgumentParser(description="Dump an OCSF schema as JSON to STDOUT")

parser.add_argument("schema", help="Path to a schema JSON file, a version identifier (to retrieve from schema.ocsf.io), or a path to an OCSF repository.")
parser.add_argument(
"schema",
help="Path to a schema JSON file, a version identifier (to retrieve from schema.ocsf.io), or a path to an OCSF repository.",
)
args = parser.parse_args()

print(to_json(get_schema(args.schema)))
Expand Down
9 changes: 6 additions & 3 deletions tests/ocsf/compile/planners/test_observable.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
)
from ocsf.compile import CompilationOptions
from ocsf.compile.protoschema import ProtoSchema
from ocsf.compile.planners.observable import MarkObservablesPlanner, MarkObservablesOp, _Registry
from ocsf.compile.planners.observable import MarkObservablesPlanner, MarkObservablesOp
from ocsf.compile.planners.observable import _Registry # type: ignore


def get_ps():
Expand Down Expand Up @@ -46,8 +47,10 @@ def test_analyze():
repo = get_ps()
planner = MarkObservablesPlanner(repo, CompilationOptions(set_observable=True))
analysis = planner.analyze(repo["objects/thing.json"])
assert isinstance(analysis, MarkObservablesOp)
assert analysis.target == "objects/thing.json"
assert isinstance(analysis, list)
assert len(analysis) == 1
assert isinstance(analysis[0], MarkObservablesOp)
assert analysis[0].target == "objects/thing.json"


def test_apply():
Expand Down
6 changes: 6 additions & 0 deletions tests/ocsf/compile/test_versus.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ def test_versus():
]:
check_prop(change, attr)

if isinstance(change.enum, dict):
for k, value in change.enum.items():
assert isinstance(value, NoChange), f"Expected enum value {k} to be NoChange, got {value}"
else:
assert isinstance(change.enum, NoChange), f"Expected enum to be NoChange, got {change.enum}"

for name in sorted(diff.profiles.keys()):
print("Testing profile:", name)
profile = diff.profiles[name]
Expand Down

0 comments on commit 5fe25e1

Please sign in to comment.