diff --git a/bin/stix-shifter-diag b/bin/stix-shifter-diag index 244ae5f7..3f9e9554 100755 --- a/bin/stix-shifter-diag +++ b/bin/stix-shifter-diag @@ -3,7 +3,7 @@ import argparse import datetime from kestrel_datasource_stixshifter.diagnosis import Diagnosis -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from firepit.timestamp import timefmt @@ -83,7 +83,7 @@ if __name__ == "__main__": diag.diagnose_config() # 2. setup connector and ping - check_module_availability(diag.connector_name, args.ignore_cert) + setup_connector_module(diag.connector_name, diag.allow_dev_connector, args.ignore_cert) # 3. query translation test diag.diagnose_translate_query(patterns[0]) diff --git a/src/kestrel_datasource_stixshifter/config.py b/src/kestrel_datasource_stixshifter/config.py index 7f5b967d..1b4a907f 100644 --- a/src/kestrel_datasource_stixshifter/config.py +++ b/src/kestrel_datasource_stixshifter/config.py @@ -17,6 +17,7 @@ RETRIEVAL_BATCH_SIZE = 2000 SINGLE_BATCH_TIMEOUT = 60 COOL_DOWN_AFTER_TRANSMISSION = 0 +ALLOW_DEV_CONNECTOR = False FAST_TRANSLATE_CONNECTORS = [] # Suggested: ["qradar", "elastic_ecs"] @@ -140,8 +141,9 @@ def get_datasource_from_profiles(profile_name, profiles): if "options" not in connection: connection["options"] = {} - retrieval_batch_size = _extract_integer_param_from_connection_config( + retrieval_batch_size = _extract_param_from_connection_config( "retrieval_batch_size", + int, RETRIEVAL_BATCH_SIZE, connection, profile_name, @@ -149,8 +151,9 @@ def get_datasource_from_profiles(profile_name, profiles): # rename this field for stix-shifter use; x2 the size to ensure retrieval connection["options"]["result_limit"] = retrieval_batch_size * 2 - single_batch_timeout = _extract_integer_param_from_connection_config( + single_batch_timeout = _extract_param_from_connection_config( "single_batch_timeout", + int, SINGLE_BATCH_TIMEOUT, connection, profile_name, @@ -158,19 +161,29 @@ def get_datasource_from_profiles(profile_name, profiles): # rename this field for stix-shifter use connection["options"]["timeout"] = single_batch_timeout - cool_down_after_transmission = _extract_integer_param_from_connection_config( + cool_down_after_transmission = _extract_param_from_connection_config( "cool_down_after_transmission", + int, COOL_DOWN_AFTER_TRANSMISSION, connection, profile_name, ) + allow_dev_connector = _extract_param_from_connection_config( + "allow_dev_connector", + bool, + ALLOW_DEV_CONNECTOR, + connection, + profile_name, + ) + return ( connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, + allow_dev_connector, ) @@ -208,14 +221,14 @@ def load_options(): return config["options"] -def _extract_integer_param_from_connection_config( - param_name, default, connection, profile_name +def _extract_param_from_connection_config( + param_name, processing_func, default, connection, profile_name ): value = default if param_name in connection["options"]: # remove the non-stix-shifter field {param_name} to avoid stix-shifter error try: - value = int(connection["options"].pop(param_name)) + value = processing_func(connection["options"].pop(param_name)) except: raise InvalidDataSource( profile_name, diff --git a/src/kestrel_datasource_stixshifter/connector.py b/src/kestrel_datasource_stixshifter/connector.py index a41dffda..370eecea 100644 --- a/src/kestrel_datasource_stixshifter/connector.py +++ b/src/kestrel_datasource_stixshifter/connector.py @@ -89,40 +89,36 @@ def install_package(connector_name, requests_verify=True): ) -def ensure_version_consistency(connector_name, requests_verify=True): - """Check if the installed connector package has the same version as - stix-shifter If the version is different, uninstall connector - package and the install the same version as stix-shifter - - """ - stixshifter_version = version("stix_shifter") - package_name = get_package_name(connector_name) - package_version = version(package_name) - if package_version == stixshifter_version: - return - package_w_ver = package_name + "==" + package_version - _logger.info( - f"{package_name} version {package_version} is different " - f"from stix-shifter version {stixshifter_version}." - ) - _logger.info(f'uninstalling Python package "{package_w_ver}".') - try: - subprocess.check_call( - [sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver] - ) - except: - _logger.info(f"failed to uninstall package {package_w_ver}") - install_package(connector_name, requests_verify) - - -def check_module_availability(connector_name, requests_verify=True): +def setup_connector_module( + connector_name, allow_dev_connector=False, requests_verify=True +): try: importlib.import_module( "stix_shifter_modules." + connector_name + ".entry_point" ) - - ensure_version_consistency(connector_name, requests_verify) - except: + connector_available = False + else: + stixshifter_version = version("stix_shifter") + package_name = get_package_name(connector_name) + package_version = version(package_name) + if package_version == stixshifter_version or allow_dev_connector: + connector_available = True + else: + connector_available = False + package_w_ver = package_name + "==" + package_version + _logger.info( + f"{package_name} version {package_version} is different " + f"from stix-shifter version {stixshifter_version}." + ) + _logger.info(f'uninstalling Python package "{package_w_ver}".') + try: + subprocess.check_call( + [sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver] + ) + except: + _logger.info(f"failed to uninstall package {package_w_ver}") + + if not connector_available: _logger.info(f'miss STIX-shifter connector "{connector_name}"') install_package(connector_name, requests_verify) diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index 7cb5f929..0e81cc00 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -25,6 +25,7 @@ def __init__(self, datasource_name): self.configuration_dict, self.retrieval_batch_size, self.cool_down_after_transmission, + self.allow_dev_connector, ) = get_datasource_from_profiles(datasource_name, self.profiles) self.if_fast_translation = ( self.connector_name in self.kestrel_options["fast_translate"] diff --git a/src/kestrel_datasource_stixshifter/interface.py b/src/kestrel_datasource_stixshifter/interface.py index 3ee7ef39..6bb24cfb 100644 --- a/src/kestrel_datasource_stixshifter/interface.py +++ b/src/kestrel_datasource_stixshifter/interface.py @@ -32,6 +32,7 @@ retrieval_batch_size: 10000 # set to 10000 to match default Elasticsearch page size; Kestrel default across connectors: 2000 single_batch_timeout: 120 # increase it if hit 60 seconds (Kestrel default) timeout error for each batch of retrieval cool_down_after_transmission: 2 # seconds to cool down between data source API calls, required by some API such as sentinelone; Kestrel default: 0 + allow_dev_connector: True # do not check version of a connector to allow custom/testing connector installed with any version; Kestrel default: False dialects: # more info: https://github.com/opencybersecurityalliance/stix-shifter/tree/develop/stix_shifter_modules/elastic_ecs#dialects - beats # need it if the index is created by Filebeat/Winlogbeat/*beat config: diff --git a/src/kestrel_datasource_stixshifter/query.py b/src/kestrel_datasource_stixshifter/query.py index f3fa5ea6..fa0d61e5 100644 --- a/src/kestrel_datasource_stixshifter/query.py +++ b/src/kestrel_datasource_stixshifter/query.py @@ -9,7 +9,7 @@ from kestrel.datasource import ReturnFromStore from kestrel.utils import mkdtemp from kestrel.exceptions import DataSourceError, DataSourceManagerInternalError -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from kestrel_datasource_stixshifter import multiproc from kestrel_datasource_stixshifter.config import ( get_datasource_from_profiles, @@ -82,11 +82,12 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None): configuration_dict, retrieval_batch_size, cool_down_after_transmission, + allow_dev_connector, ) = map( copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"]) ) - check_module_availability(connector_name) + setup_connector_module(connector_name, allow_dev_connector) if _logger.isEnabledFor(logging.DEBUG): data_path_striped = "".join(filter(str.isalnum, profile)) diff --git a/tests/test_stixshifter.py b/tests/test_stixshifter.py index d8cb93a5..89b62efa 100644 --- a/tests/test_stixshifter.py +++ b/tests/test_stixshifter.py @@ -1,11 +1,16 @@ import pytest import os +import sys +import subprocess +import importlib +from importlib.metadata import version from kestrel.session import Session from kestrel_datasource_stixshifter.connector import ( verify_package_origin, - check_module_availability, + setup_connector_module, + get_package_name, ) from kestrel_datasource_stixshifter.config import get_datasource_from_profiles @@ -17,10 +22,31 @@ def test_verify_package_origin(): verify_package_origin(connector_name, "test_version") -def test_check_module_availability(): +def test_setup_connector_module(): connectors = ["stix_bundle"] for connector_name in connectors: - check_module_availability(connector_name) + setup_connector_module(connector_name) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + + +def test_setup_connector_module_w_wrong_version(): + subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-paloalto==5.0.0"]) + connector_name = "paloalto" + setup_connector_module(connector_name) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + stixshifter_version = version("stix_shifter") + package_name = get_package_name(connector_name) + package_version = version(package_name) + assert stixshifter_version == package_version + + +def test_setup_connector_module_dev_connector(): + subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-datadog==5.0.0"]) + connector_name = "datadog" + setup_connector_module(connector_name, True) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + package_version = version(get_package_name(connector_name)) + assert package_version == "5.0.0" def test_yaml_profiles_refresh(tmp_path): @@ -51,12 +77,13 @@ def test_yaml_profiles_refresh(tmp_path): retrieval_batch_size: 10000 single_batch_timeout: 120 cool_down_after_transmission: 5 + allow_dev_connector: True dialects: - beats config: auth: id: profileB - api_key: asdf + api_key: xxxxxx """ profile_file = tmp_path / "stixshifter.yaml" @@ -79,7 +106,7 @@ def test_yaml_profiles_refresh(tmp_path): ss_config = s.config["datasources"]["kestrel_datasource_stixshifter"] ss_profiles = ss_config["profiles"] - connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles) + connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles) assert connector_name == "elastic_ecs" assert configuration["auth"]["id"] == "profileA" assert configuration["auth"]["api_key"] == "qwer" @@ -95,13 +122,14 @@ def test_yaml_profiles_refresh(tmp_path): # need to refresh the pointers since the dict is updated ss_profiles = ss_config["profiles"] - connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles) + connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles) assert connector_name == "elastic_ecs" assert configuration["auth"]["id"] == "profileB" - assert configuration["auth"]["api_key"] == "asdf" + assert configuration["auth"]["api_key"] == "xxxxxx" assert connection["options"]["timeout"] == 120 assert connection["options"]["result_limit"] == 10000 * 2 assert retrieval_batch_size == 10000 assert cool_down_after_transmission == 5 + assert allow_dev_connector == True del os.environ["KESTREL_STIXSHIFTER_CONFIG"] diff --git a/tests/test_stixshifter_translator.py b/tests/test_stixshifter_translator.py index 94c58d9d..3d36cff0 100644 --- a/tests/test_stixshifter_translator.py +++ b/tests/test_stixshifter_translator.py @@ -4,7 +4,7 @@ import pytest from multiprocessing import Queue -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from kestrel_datasource_stixshifter import multiproc from kestrel_datasource_stixshifter.worker.utils import TransmissionResult from kestrel_datasource_stixshifter.worker import STOP_SIGN @@ -78,7 +78,7 @@ def test_stixshifter_translate(): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) input_queue = Queue() output_queue = Queue() @@ -108,7 +108,7 @@ def test_stixshifter_translate(): def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) cache_bundle_path_prefix = str(tmpdir.join("test")) offset_str = str(SAMPLE_RESULT.offset).zfill(32) cache_bundle_path = cache_bundle_path_prefix + f"_{offset_str}.json" @@ -145,7 +145,7 @@ def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir): def test_fast_translate(): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) input_queue = Queue() output_queue = Queue() @@ -174,7 +174,7 @@ def test_fast_translate(): def test_stixshifter_fast_translate_with_parquet_writing_to_disk(tmpdir): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) cache_parquet_path_prefix = str(tmpdir.join("test")) offset_str = str(SAMPLE_RESULT.offset).zfill(32) cache_parquet_path = cache_parquet_path_prefix + f"_{offset_str}.parquet"