Skip to content

Commit

Permalink
Merge pull request #406 from opencybersecurityalliance/feature-402-ss…
Browse files Browse the repository at this point in the history
…-dev-mode

add custom connector support
  • Loading branch information
subbyte authored Sep 21, 2023
2 parents af397fa + 43041e8 commit c75d4cf
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 52 deletions.
4 changes: 2 additions & 2 deletions bin/stix-shifter-diag
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import datetime
from kestrel_datasource_stixshifter.diagnosis import Diagnosis
from kestrel_datasource_stixshifter.connector import check_module_availability
from kestrel_datasource_stixshifter.connector import setup_connector_module
from firepit.timestamp import timefmt


Expand Down Expand Up @@ -83,7 +83,7 @@ if __name__ == "__main__":
diag.diagnose_config()

# 2. setup connector and ping
check_module_availability(diag.connector_name, args.ignore_cert)
setup_connector_module(diag.connector_name, diag.allow_dev_connector, args.ignore_cert)

# 3. query translation test
diag.diagnose_translate_query(patterns[0])
Expand Down
25 changes: 19 additions & 6 deletions src/kestrel_datasource_stixshifter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
RETRIEVAL_BATCH_SIZE = 2000
SINGLE_BATCH_TIMEOUT = 60
COOL_DOWN_AFTER_TRANSMISSION = 0
ALLOW_DEV_CONNECTOR = False
FAST_TRANSLATE_CONNECTORS = [] # Suggested: ["qradar", "elastic_ecs"]


Expand Down Expand Up @@ -140,37 +141,49 @@ def get_datasource_from_profiles(profile_name, profiles):
if "options" not in connection:
connection["options"] = {}

retrieval_batch_size = _extract_integer_param_from_connection_config(
retrieval_batch_size = _extract_param_from_connection_config(
"retrieval_batch_size",
int,
RETRIEVAL_BATCH_SIZE,
connection,
profile_name,
)
# rename this field for stix-shifter use; x2 the size to ensure retrieval
connection["options"]["result_limit"] = retrieval_batch_size * 2

single_batch_timeout = _extract_integer_param_from_connection_config(
single_batch_timeout = _extract_param_from_connection_config(
"single_batch_timeout",
int,
SINGLE_BATCH_TIMEOUT,
connection,
profile_name,
)
# rename this field for stix-shifter use
connection["options"]["timeout"] = single_batch_timeout

cool_down_after_transmission = _extract_integer_param_from_connection_config(
cool_down_after_transmission = _extract_param_from_connection_config(
"cool_down_after_transmission",
int,
COOL_DOWN_AFTER_TRANSMISSION,
connection,
profile_name,
)

allow_dev_connector = _extract_param_from_connection_config(
"allow_dev_connector",
bool,
ALLOW_DEV_CONNECTOR,
connection,
profile_name,
)

return (
connector_name,
connection,
configuration,
retrieval_batch_size,
cool_down_after_transmission,
allow_dev_connector,
)


Expand Down Expand Up @@ -208,14 +221,14 @@ def load_options():
return config["options"]


def _extract_integer_param_from_connection_config(
param_name, default, connection, profile_name
def _extract_param_from_connection_config(
param_name, processing_func, default, connection, profile_name
):
value = default
if param_name in connection["options"]:
# remove the non-stix-shifter field {param_name} to avoid stix-shifter error
try:
value = int(connection["options"].pop(param_name))
value = processing_func(connection["options"].pop(param_name))
except:
raise InvalidDataSource(
profile_name,
Expand Down
56 changes: 26 additions & 30 deletions src/kestrel_datasource_stixshifter/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,40 +89,36 @@ def install_package(connector_name, requests_verify=True):
)


def ensure_version_consistency(connector_name, requests_verify=True):
"""Check if the installed connector package has the same version as
stix-shifter If the version is different, uninstall connector
package and the install the same version as stix-shifter
"""
stixshifter_version = version("stix_shifter")
package_name = get_package_name(connector_name)
package_version = version(package_name)
if package_version == stixshifter_version:
return
package_w_ver = package_name + "==" + package_version
_logger.info(
f"{package_name} version {package_version} is different "
f"from stix-shifter version {stixshifter_version}."
)
_logger.info(f'uninstalling Python package "{package_w_ver}".')
try:
subprocess.check_call(
[sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver]
)
except:
_logger.info(f"failed to uninstall package {package_w_ver}")
install_package(connector_name, requests_verify)


def check_module_availability(connector_name, requests_verify=True):
def setup_connector_module(
connector_name, allow_dev_connector=False, requests_verify=True
):
try:
importlib.import_module(
"stix_shifter_modules." + connector_name + ".entry_point"
)

ensure_version_consistency(connector_name, requests_verify)

except:
connector_available = False
else:
stixshifter_version = version("stix_shifter")
package_name = get_package_name(connector_name)
package_version = version(package_name)
if package_version == stixshifter_version or allow_dev_connector:
connector_available = True
else:
connector_available = False
package_w_ver = package_name + "==" + package_version
_logger.info(
f"{package_name} version {package_version} is different "
f"from stix-shifter version {stixshifter_version}."
)
_logger.info(f'uninstalling Python package "{package_w_ver}".')
try:
subprocess.check_call(
[sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver]
)
except:
_logger.info(f"failed to uninstall package {package_w_ver}")

if not connector_available:
_logger.info(f'miss STIX-shifter connector "{connector_name}"')
install_package(connector_name, requests_verify)
1 change: 1 addition & 0 deletions src/kestrel_datasource_stixshifter/diagnosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, datasource_name):
self.configuration_dict,
self.retrieval_batch_size,
self.cool_down_after_transmission,
self.allow_dev_connector,
) = get_datasource_from_profiles(datasource_name, self.profiles)
self.if_fast_translation = (
self.connector_name in self.kestrel_options["fast_translate"]
Expand Down
1 change: 1 addition & 0 deletions src/kestrel_datasource_stixshifter/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
retrieval_batch_size: 10000 # set to 10000 to match default Elasticsearch page size; Kestrel default across connectors: 2000
single_batch_timeout: 120 # increase it if hit 60 seconds (Kestrel default) timeout error for each batch of retrieval
cool_down_after_transmission: 2 # seconds to cool down between data source API calls, required by some API such as sentinelone; Kestrel default: 0
allow_dev_connector: True # do not check version of a connector to allow custom/testing connector installed with any version; Kestrel default: False
dialects: # more info: https://github.com/opencybersecurityalliance/stix-shifter/tree/develop/stix_shifter_modules/elastic_ecs#dialects
- beats # need it if the index is created by Filebeat/Winlogbeat/*beat
config:
Expand Down
5 changes: 3 additions & 2 deletions src/kestrel_datasource_stixshifter/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from kestrel.datasource import ReturnFromStore
from kestrel.utils import mkdtemp
from kestrel.exceptions import DataSourceError, DataSourceManagerInternalError
from kestrel_datasource_stixshifter.connector import check_module_availability
from kestrel_datasource_stixshifter.connector import setup_connector_module
from kestrel_datasource_stixshifter import multiproc
from kestrel_datasource_stixshifter.config import (
get_datasource_from_profiles,
Expand Down Expand Up @@ -82,11 +82,12 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None):
configuration_dict,
retrieval_batch_size,
cool_down_after_transmission,
allow_dev_connector,
) = map(
copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"])
)

check_module_availability(connector_name)
setup_connector_module(connector_name, allow_dev_connector)

if _logger.isEnabledFor(logging.DEBUG):
data_path_striped = "".join(filter(str.isalnum, profile))
Expand Down
42 changes: 35 additions & 7 deletions tests/test_stixshifter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import pytest
import os
import sys
import subprocess
import importlib
from importlib.metadata import version

from kestrel.session import Session

from kestrel_datasource_stixshifter.connector import (
verify_package_origin,
check_module_availability,
setup_connector_module,
get_package_name,
)

from kestrel_datasource_stixshifter.config import get_datasource_from_profiles
Expand All @@ -17,10 +22,31 @@ def test_verify_package_origin():
verify_package_origin(connector_name, "test_version")


def test_check_module_availability():
def test_setup_connector_module():
connectors = ["stix_bundle"]
for connector_name in connectors:
check_module_availability(connector_name)
setup_connector_module(connector_name)
importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point")


def test_setup_connector_module_w_wrong_version():
subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-paloalto==5.0.0"])
connector_name = "paloalto"
setup_connector_module(connector_name)
importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point")
stixshifter_version = version("stix_shifter")
package_name = get_package_name(connector_name)
package_version = version(package_name)
assert stixshifter_version == package_version


def test_setup_connector_module_dev_connector():
subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-datadog==5.0.0"])
connector_name = "datadog"
setup_connector_module(connector_name, True)
importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point")
package_version = version(get_package_name(connector_name))
assert package_version == "5.0.0"


def test_yaml_profiles_refresh(tmp_path):
Expand Down Expand Up @@ -51,12 +77,13 @@ def test_yaml_profiles_refresh(tmp_path):
retrieval_batch_size: 10000
single_batch_timeout: 120
cool_down_after_transmission: 5
allow_dev_connector: True
dialects:
- beats
config:
auth:
id: profileB
api_key: asdf
api_key: xxxxxx
"""

profile_file = tmp_path / "stixshifter.yaml"
Expand All @@ -79,7 +106,7 @@ def test_yaml_profiles_refresh(tmp_path):

ss_config = s.config["datasources"]["kestrel_datasource_stixshifter"]
ss_profiles = ss_config["profiles"]
connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles)
connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles)
assert connector_name == "elastic_ecs"
assert configuration["auth"]["id"] == "profileA"
assert configuration["auth"]["api_key"] == "qwer"
Expand All @@ -95,13 +122,14 @@ def test_yaml_profiles_refresh(tmp_path):

# need to refresh the pointers since the dict is updated
ss_profiles = ss_config["profiles"]
connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles)
connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles)
assert connector_name == "elastic_ecs"
assert configuration["auth"]["id"] == "profileB"
assert configuration["auth"]["api_key"] == "asdf"
assert configuration["auth"]["api_key"] == "xxxxxx"
assert connection["options"]["timeout"] == 120
assert connection["options"]["result_limit"] == 10000 * 2
assert retrieval_batch_size == 10000
assert cool_down_after_transmission == 5
assert allow_dev_connector == True

del os.environ["KESTREL_STIXSHIFTER_CONFIG"]
10 changes: 5 additions & 5 deletions tests/test_stixshifter_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from multiprocessing import Queue

from kestrel_datasource_stixshifter.connector import check_module_availability
from kestrel_datasource_stixshifter.connector import setup_connector_module
from kestrel_datasource_stixshifter import multiproc
from kestrel_datasource_stixshifter.worker.utils import TransmissionResult
from kestrel_datasource_stixshifter.worker import STOP_SIGN
Expand Down Expand Up @@ -78,7 +78,7 @@

def test_stixshifter_translate():
query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1"
check_module_availability(CONNECTOR_NAME)
setup_connector_module(CONNECTOR_NAME)

input_queue = Queue()
output_queue = Queue()
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_stixshifter_translate():

def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir):
query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1"
check_module_availability(CONNECTOR_NAME)
setup_connector_module(CONNECTOR_NAME)
cache_bundle_path_prefix = str(tmpdir.join("test"))
offset_str = str(SAMPLE_RESULT.offset).zfill(32)
cache_bundle_path = cache_bundle_path_prefix + f"_{offset_str}.json"
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir):

def test_fast_translate():
query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1"
check_module_availability(CONNECTOR_NAME)
setup_connector_module(CONNECTOR_NAME)

input_queue = Queue()
output_queue = Queue()
Expand Down Expand Up @@ -174,7 +174,7 @@ def test_fast_translate():

def test_stixshifter_fast_translate_with_parquet_writing_to_disk(tmpdir):
query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1"
check_module_availability(CONNECTOR_NAME)
setup_connector_module(CONNECTOR_NAME)
cache_parquet_path_prefix = str(tmpdir.join("test"))
offset_str = str(SAMPLE_RESULT.offset).zfill(32)
cache_parquet_path = cache_parquet_path_prefix + f"_{offset_str}.parquet"
Expand Down

0 comments on commit c75d4cf

Please sign in to comment.