Skip to content

Commit

Permalink
Merge pull request #2 from AttackIQ/feature/openai_llm
Browse files Browse the repository at this point in the history
Feature/openai llm
  • Loading branch information
slincoln-aiq authored Nov 17, 2023
2 parents d404bc5 + cd0e653 commit 9bb7c64
Show file tree
Hide file tree
Showing 24 changed files with 1,183 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,5 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Custom data folder for vectordb and sigma rules
sigmaiq/llm/data/*
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ encountered. Please report any issues [here](https://github.com/AttackIQ/SigmAIQ
Feature requests are also always welcome! pySigma tools/utils are currently not in the pre-release version,
and will be added in future releases.

# LLM Support
For LLM usage, see the [LLM README](sigmaiq/llm/README.md)

# Installation & Usage

## Installation
Expand Down
Empty file added examples/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions examples/custom_field_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# %% This example shows how to use the SigmAIQ pySigma wrapper to provide custom field mappings for a backend
# %% This will allow you to translate specific field names to custom field names during rule translation

# %% Import SigmAIQ
from sigmaiq import SigmAIQBackend, SigmAIQPipeline

# %% Import pprint for pretty printing, and copy for copying rules
from pprint import pprint
from copy import copy

# %% A basic Sigma Rule in YAML str to convert to a query.
# %% SigmAIQ also accepts a rule in JSON/Dict format, SigmaRule objects, and SigmaCollection objects

sigma_rule = """
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query
sigmaiq_backend = SigmAIQBackend(backend="splunk").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% Create custom field mappings
# %% This will map the CommandLine field to a custom field named "CustomCommandLine"
custom_field_mappings = {"CommandLine": "CustomCommandLine"}
my_custom_pipeline = SigmAIQPipeline.from_fieldmap(custom_field_mappings, priority=0).create_pipeline()

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query with our custom field mappings
sigmaiq_backend = SigmAIQBackend(
backend="splunk",
processing_pipeline=my_custom_pipeline).create_backend()

query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query with Custom Fieldmappings: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

36 changes: 36 additions & 0 deletions examples/llm_basic_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# %% This example will demonstrate how to use SigmAIQ to perform the following:
# %% 1. Download the latest Sigma Rule package release
# %% 2. Create embeddings of the Sigma Rules in the package
# %% 3. Create and save a VectorDB of the Sigma Rule embeddings
# %% 4. Use a similarity search on the VectorDB to find Sigma Rules similar to a provided query
from pprint import pprint

# %% NOTE, this example uses OpenAI for embeddings. Ensure you have an OpenAI API key set in your environment variable
# %% OPENAI_API_KEY

# %% Also ensure you have installed the correct requirements with:
# `pip install -r requirements/common.txt -r requirements/llm.txt`


# %% Import SigmAIQ LLM and OpenAIEmbeddings
from sigmaiq.llm.base import SigmaLLM

# %% Create a SigmaLLM object with default settings. See the class docstring for more information
sigma_llm = SigmaLLM()

# %% The `create_sigma_vectordb()` method will automatically do all the work for you :) (only run this once)
sigma_llm.create_sigma_vectordb(save=True) # Save locally to disk

# %% Run a similarity search on the vectordb for encoded powershell commands and print top 3 results
query = "Encoded powershell commands"
matching_rules = sigma_llm.simple_search(query, k=3)
for matching_rule in matching_rules:
print(matching_rule.page_content, end="\n\n-------------------\n\n")

# %% You can also load an existing vector store from disk (recommended)
sigma_llm.load_sigma_vectordb()

query = "certutil"
matching_rules = sigma_llm.simple_search(query, k=3)
for matching_rule in matching_rules:
print(matching_rule.page_content, end="\n\n-------------------\n\n")
58 changes: 58 additions & 0 deletions examples/llm_rule_translation_and_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# %% This example will demonstrate how to create a Sigma langchain agent chatbot, which can perform various tasks like
# %% automatically translate a rule for you, and create new rules from a users input.

# %% Import required SigmAIQ classes and functions
from sigmaiq.llm.toolkits.base import create_sigma_agent
from sigmaiq.llm.base import SigmaLLM

# %% Ensure we have our Sigma vector store setup with our base LLM class
sigma_llm = SigmaLLM()

try:
sigma_llm.load_sigma_vectordb()
except Exception as e:
print(e)
print("Creating new Sigma VectorDB")
sigma_llm.create_sigma_vectordb(save=True)

# %% Create a Sigma Agent Executor, and pass it our Sigma VectorDB
sigma_agent_executor = create_sigma_agent(sigma_vectorstore=sigma_llm.sigmadb)

# %% RULE TRANSLATION
# %% Have the agent automatically translate a Sigma rule to a Splunk query with the splunk_cim_dm pipeline

sigma_rule = r"""
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

user_input = ("Translate the following Sigma rule to a Splunk query using the 'splunk_cim_dm' pipeline: \n\n" +
sigma_rule)

# answer = sigma_agent_executor.invoke({"input": user_input})
# print("\nRULE TRANSLATION:", end="\n\n")
#print(f"Question:\n {user_input}", end="\n\n")
#print(f"Answer: \n")
#print(answer.get('output'), end="\n\n")

# %% RULE CREATION
# %% The agent will take the user input, look up similar Sigma Rules in the Sigma vector store, then create a brand
# %% new rule based on the context of the users input and the similar Sigma Rules.

user_input = ("Create a Windows process creation Sigma Rule for certutil downloading a file "
"from definitely-not-malware.com, then translate it to a Microsoft 365 Defender query.")

answer = sigma_agent_executor.invoke({"input": user_input})
print("\nRULE CREATION:", end="\n\n")
print(f"Question:\n {user_input}", end="\n\n")
print(f"Answer: \n")
print(answer.get('output'), end="\n\n")


77 changes: 77 additions & 0 deletions examples/translate_sigma_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# %% This example shows how to use the SigmAIQ pySigma wrapper to easily translate Sigma rules to queries
# %% easily, without having to worry about installing and configuring the correct backends, pipelines and other details.


# %% Import SigmAIQ
from sigmaiq import SigmAIQBackend

# %% Import pprint for pretty printing, and copy for copying rules
from pprint import pprint
from copy import copy

# %% A basic Sigma Rule in YAML str to convert to a query.
# %% SigmAIQ also accepts a rule in JSON/Dict format, SigmaRule objects, and SigmaCollection objects

sigma_rule = """
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

# %% BACKENDS
# %% Show the available supported backends
print("Supported Backends:", end="\n\n")
pprint(SigmAIQBackend.display_available_backends())
print("\n-------------------")

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query
# %% SigmAIQ will automatically select the best pipeline for the backend
sigmaiq_backend = SigmAIQBackend(backend="microsoft365defender").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% PIPELINES
# %% Show the available pipelines with each backend
print("Available Pipelines:", end="\n\n")
pprint(SigmAIQBackend.display_all_associated_pipelines())
print("\n-------------------")

# %% Create SigmAIQ backend translate the rule to a Splunk search with the CIM pipeline
sigmaiq_backend = SigmAIQBackend(backend="splunk", processing_pipeline="splunk_cim_dm").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule))

print("\nSplunk CIM Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% OUTPUT FORMATS
# %% Show the available output formats with each backend
print("\nAvailable Output Formats:", end="\n\n")
pprint(SigmAIQBackend.display_backends_and_outputs())
print("\n-------------------")

# %% Change the output_format to an Enterprise Security Correlation Search stanza
sigmaiq_backend.set_output_format("stanza")
query = sigmaiq_backend.translate(copy(sigma_rule))

print("\nSplunk CIM Query, Stanza Output: ", end="\n\n")
pprint(query[0])
print("\n-------------------")


# %% You can also translate a Sigma rule to all supported backend, pipeline, and output format combinations at once.
# %% Any combination that is not supported will not be included in the results
# %% This is useful for testing and comparing the output of different backends and pipelines
queries = SigmAIQBackend.create_all_and_translate(copy(sigma_rule))

print("\n All Translations: ", end="\n\n")
pprint(queries)
14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "SigmAIQ"
version = "0.2.4"
version = "0.3.0"
description = "Wrapper and tools for pySigma and Sigma rules"
authors = ["Stephen Lincoln <stephen.lincoln@attackiq.com>", "AttackIQ <support@attackiq.com>"]
readme = "README.md"
Expand All @@ -20,7 +20,7 @@ packages = [
]

[tool.poetry.dependencies]
python = "^3.8"
python = ">=3.8.1, <=3.11.6"
pysigma = "0.9.11"
certifi = "^2023.07.22"
pysigma-backend-carbonblack = "0.1.4"
Expand All @@ -39,16 +39,20 @@ pysigma-pipeline-sysmon = "1.0.2"
pysigma-pipeline-windows = "1.1.0"
importlib-resources = "^5.13.0"




[tool.poetry.dev-dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
black = "^23.7.0"
ruff = "^0.0.286"

[tool.poetry.group.llm]
optional = true

[tool.poetry.group.llm.dependencies]
langchain = "^0.0.335"
openai = "^1.2.4"
tiktoken = "^0.5.1"
faiss-cpu = "^1.7.4"

[build-system]
requires = ["poetry-core"]
Expand Down
26 changes: 1 addition & 25 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1 @@
certifi==2023.7.22 ; python_version >= "3.8" and python_version < "4.0"
charset-normalizer==3.2.0 ; python_version >= "3.8" and python_version < "4.0"
idna==3.4 ; python_version >= "3.8" and python_version < "4.0"
importlib-resources==5.13.0 ; python_version >= "3.8" and python_version < "4.0"
packaging==22.0 ; python_version >= "3.8" and python_version < "4.0"
pyparsing==3.1.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-carbonblack==0.1.4 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-cortexxdr==0.1.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-elasticsearch==1.0.5 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-insightidr==0.2.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-loki==0.9.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-microsoft365defender==0.2.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-opensearch==1.0.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-qradar-aql==0.2.3 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-sentinelone==0.1.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-splunk==1.0.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-stix==0.1.8 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-crowdstrike==1.0.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-sysmon==1.0.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-windows==1.1.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma==0.9.11 ; python_version >= "3.8" and python_version < "4.0"
pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "4.0"
requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0"
urllib3==2.0.4 ; python_version >= "3.8" and python_version < "4.0"
zipp==3.16.2 ; python_version >= "3.8" and python_version < "3.10"
-r requirements/common.txt
25 changes: 25 additions & 0 deletions requirements/common.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
certifi==2023.7.22 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
charset-normalizer==3.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
idna==3.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
importlib-resources==5.13.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
packaging==22.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyparsing==3.1.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-carbonblack==0.1.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-cortexxdr==0.1.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-elasticsearch==1.0.5 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-insightidr==0.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-loki==0.9.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-microsoft365defender==0.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-opensearch==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-qradar-aql==0.2.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-sentinelone==0.1.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-splunk==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-stix==0.1.8 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-crowdstrike==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-sysmon==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-windows==1.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma==0.9.11 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyyaml==6.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
requests==2.31.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
urllib3==2.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
zipp==3.17.0 ; python_full_version >= "3.8.1" and python_version < "3.10"
60 changes: 60 additions & 0 deletions requirements/llm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
aiohttp==3.8.6 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
aiosignal==1.3.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
annotated-types==0.6.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
anyio==3.7.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
async-timeout==4.0.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
attrs==23.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
backoff==2.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
beautifulsoup4==4.12.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
certifi==2023.7.22 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
chardet==5.2.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
charset-normalizer==3.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
click==8.1.7 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
colorama==0.4.6 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6" and platform_system == "Windows"
dataclasses-json==0.6.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
distro==1.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
emoji==2.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
exceptiongroup==1.1.3 ; python_full_version >= "3.8.1" and python_version < "3.11"
faiss-cpu==1.7.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
filetype==1.2.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
frozenlist==1.4.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
greenlet==3.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6" and (platform_machine == "win32" or platform_machine == "WIN32" or platform_machine == "AMD64" or platform_machine == "amd64" or platform_machine == "x86_64" or platform_machine == "ppc64le" or platform_machine == "aarch64")
h11==0.14.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
httpcore==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
httpx==0.25.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
idna==3.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
joblib==1.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
jsonpatch==1.33 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
jsonpointer==2.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langchain==0.0.335 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langdetect==1.0.9 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langsmith==0.0.64 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
lxml==4.9.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
marshmallow==3.20.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
multidict==6.0.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
mypy-extensions==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
nltk==3.8.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
numpy==1.24.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
openai==1.2.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
packaging==22.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pydantic-core==2.14.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pydantic==2.5.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
python-iso639==2023.6.15 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
python-magic==0.4.27 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyyaml==6.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
rapidfuzz==3.5.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
regex==2023.10.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
requests==2.31.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
six==1.16.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
soupsieve==2.5 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
sqlalchemy==2.0.23 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tabulate==0.9.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tenacity==8.2.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tiktoken==0.5.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tqdm==4.66.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
typing-extensions==4.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
typing-inspect==0.9.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
unstructured==0.10.30 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
urllib3==2.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
yarl==1.9.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
Loading

0 comments on commit 9bb7c64

Please sign in to comment.