diff --git a/.github/workflows/scripts/ansible-runtime.yaml b/.github/workflows/scripts/ansible-runtime.yaml new file mode 100644 index 0000000000..0d70101544 --- /dev/null +++ b/.github/workflows/scripts/ansible-runtime.yaml @@ -0,0 +1,225 @@ +# SPDX-FileCopyrightText: 2021 Birger Schacht +# +# SPDX-License-Identifier: CC0-1.0 +cymru-whois-expert: + bot_id: cymru-whois-expert + description: Cymru Whois (IP to ASN) is the bot responsible to add network information + to the events (BGP, ASN, AS Name, Country, etc..). + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.cymru_whois.expert + name: Cymru Whois + parameters: + destination_queues: + _default: [file-output-queue] + overwrite: true + redis_cache_db: 5 + redis_cache_host: 127.0.0.1 + redis_cache_password: null + redis_cache_port: 6379 + redis_cache_ttl: 86400 + run_mode: continuous +deduplicator-expert: + bot_id: deduplicator-expert + description: Deduplicator is the bot responsible for detection and removal of duplicate + messages. Messages get cached for seconds. If found in the cache, + it is assumed to be a duplicate. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.deduplicator.expert + name: Deduplicator + parameters: + destination_queues: + _default: [taxonomy-expert-queue] + filter_keys: raw,time.observation + filter_type: blacklist + redis_cache_db: 6 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 86400 + run_mode: continuous +feodo-tracker-browse-collector: + description: Generic URL Fetcher is the bot responsible to get the report from an + URL. + enabled: true + group: Collector + module: intelmq.bots.collectors.http.collector_http + name: URL Fetcher + parameters: + destination_queues: + _default: [feodo-tracker-browse-parser-queue] + extract_files: false + http_password: null + http_url: https://feodotracker.abuse.ch/browse + http_url_formatting: false + http_username: null + name: Feodo Tracker Browse + provider: Abuse.ch + rate_limit: 86400 + ssl_client_certificate: null + run_mode: continuous +feodo-tracker-browse-parser: + description: HTML Table Parser is a bot configurable to parse different html table + data. + enabled: true + group: Parser + module: intelmq.bots.parsers.html_table.parser + name: HTML Table + parameters: + attribute_name: '' + attribute_value: '' + columns: time.source,source.ip,malware.name,status,extra.SBL,source.as_name,source.geolocation.cc + default_url_protocol: http:// + destination_queues: + _default: [deduplicator-expert-queue] + ignore_values: ',,,,Not listed,,' + skip_table_head: true + split_column: '' + split_index: 0 + split_separator: '' + table_index: 0 + time_format: null + type: c2server + run_mode: continuous +file-input: + bod_id: foobar + description: foobar + enabled: true + group: Collectors + module: intelmq.bots.collectors.file.collector_file + name: File Input + parameters: + delete_file: false + destination_queues: + _default: [file-output-queue] + path: /assets + postfix: .txt + run_mode: scheduled +file-output: + bot_id: file-output + description: File is the bot responsible to send events to a file. + enabled: true + group: Output + groupname: outputs + module: intelmq.bots.outputs.file.output + name: File + parameters: {file: /var/lib/intelmq/bots/file-output/events.txt, hierarchical_output: false, + single_key: null} + run_mode: continuous +gethostbyname-1-expert: + bot_id: gethostbyname-1-expert + description: fqdn2ip is the bot responsible to parsing the ip from the fqdn. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.gethostbyname.expert + name: Gethostbyname + parameters: + destination_queues: + _default: [cymru-whois-expert-queue] + run_mode: continuous +gethostbyname-2-expert: + bot_id: gethostbyname-2-expert + description: fqdn2ip is the bot responsible to parsing the ip from the fqdn. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.gethostbyname.expert + name: Gethostbyname + parameters: + destination_queues: + _default: [cymru-whois-expert-queue] + run_mode: continuous +malc0de-parser: + bot_id: malc0de-parser + description: Malc0de Parser is the bot responsible to parse the IP Blacklist and + either Windows Format or Bind Format reports and sanitize the information. + enabled: true + group: Parser + groupname: parsers + module: intelmq.bots.parsers.malc0de.parser + name: Malc0de + parameters: + destination_queues: + _default: [deduplicator-expert-queue] + run_mode: continuous +malc0de-windows-format-collector: + bot_id: malc0de-windows-format-collector + description: '' + enabled: true + group: Collector + groupname: collectors + module: intelmq.bots.collectors.http.collector_http + name: Malc0de Windows Format + parameters: + destination_queues: + _default: [malc0de-parser-queue] + http_password: null + http_url: https://malc0de.com/bl/BOOT + http_username: null + name: Windows Format + provider: Malc0de + rate_limit: 10800 + ssl_client_certificate: null + run_mode: continuous +spamhaus-drop-collector: + bot_id: spamhaus-drop-collector + description: '' + enabled: true + group: Collector + groupname: collectors + module: intelmq.bots.collectors.http.collector_http + name: Spamhaus Drop + parameters: + destination_queues: + _default: [spamhaus-drop-parser-queue] + http_password: null + http_url: https://www.spamhaus.org/drop/drop.txt + http_username: null + name: Drop + provider: Spamhaus + rate_limit: 3600 + ssl_client_certificate: null + run_mode: continuous +spamhaus-drop-parser: + bot_id: spamhaus-drop-parser + description: Spamhaus Drop Parser is the bot responsible to parse the DROP, EDROP, + DROPv6, and ASN-DROP reports and sanitize the information. + enabled: true + group: Parser + groupname: parsers + module: intelmq.bots.parsers.spamhaus.parser_drop + name: Spamhaus Drop + parameters: + destination_queues: + _default: [deduplicator-expert-queue] + run_mode: continuous +taxonomy-expert: + bot_id: taxonomy-expert + description: Taxonomy is the bot responsible to apply the eCSIRT Taxonomy to all + events. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.taxonomy.expert + name: Taxonomy + parameters: + destination_queues: + _default: [url2fqdn-expert-queue] + run_mode: continuous +url2fqdn-expert: + bot_id: url2fqdn-expert + description: url2fqdn is the bot responsible to parsing the fqdn from the url. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.url2fqdn.expert + name: URL2FQDN + parameters: + destination_queues: + _default: [gethostbyname-1-expert-queue, gethostbyname-2-expert-queue] + load_balance: true + overwrite: false + run_mode: continuous diff --git a/CHANGELOG.md b/CHANGELOG.md index cfc646c69e..2f376c65c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -390,7 +390,19 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Add support for new field `SourceIpInfo.SourceIpv4Int` (PR#1940 by Sebastian Wagner). - Fix mapping of "ConnectionType" fields, this is not `protocol.application`. Now mapped to `extra.*.connection_type` (PR#1940 by Sebastian Wagner). - `intelmq.bots.parsers.shadowserver._config`: +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events* (PR#1950, PR#1952, PR#1953 and PR#1954 by Birger Schacht and Sebastian Wagner). +>>>>>>> 366505cc6 (ENH: add event46_sinkhole shadowserver config & tests) +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner). +>>>>>>> 4d3f4d647 (ENH+DOC: shadowserver exchange feed) +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). +>>>>>>> f056ff7d4 (DOC for PR#1971) #### Experts - `intelmq.bots.experts.splunk_saved_search.expert`: diff --git a/NEWS.md b/NEWS.md index 0b4482aeb1..d14cdbbed4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -190,6 +190,9 @@ UPDATE events UPDATE events SET "classification.taxonomy" = 'information-content-security', "classification.type" = 'unauthorised-information-modification' WHERE "classification.taxonomy" = 'intrusions', "classification.type" = 'defacement' +UPDATE events + SET "classification.taxonomy" = 'information-content-security', "classification.type" = 'unauthorised-information-modification' + WHERE "classification.taxonomy" = 'intrusions', "classification.type" = 'defacement' UPDATE events SET "classification.taxonomy" = 'malicious-code' WHERE "classification.taxonomy" = 'malicious code'; @@ -284,7 +287,7 @@ CentOS 7 (with EPEL) provides both Python 3.4 and Python 3.6. If IntelMQ was ins type and reloads them afterwards. Removes any external dependencies (such as curl or wget). This is a replacement for shell scripts such as `update-tor-nodes`, `update-asn-data`, `update-geoip-data`, `update-rfiprisk-data`. - + Usage: ``` intelmq.bots.experts.asn_lookup.expert --update-database diff --git a/debian/control b/debian/control index 6ba1f737d7..227fcacf0f 100644 --- a/debian/control +++ b/debian/control @@ -20,6 +20,7 @@ Build-Depends: debhelper (>= 4.1.16), python3-sphinx-rtd-theme, python3-termstyle, python3-tz, + python3-msgpack, quilt, rsync, safe-rm @@ -41,6 +42,7 @@ Depends: bash-completion, python3-ruamel.yaml, python3-termstyle (>= 0.1.10), python3-tz, + python3-msgpack, redis-server, systemd, ${misc:Depends}, diff --git a/docs/user/bots.rst b/docs/user/bots.rst index fbad6c971a..6015dc6eb6 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -502,6 +502,9 @@ Requires the `kafka python library `_. * `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Only used by some bots. +.. _intelmq.bots.collectors.misp.collector: + + .. _intelmq.bots.collectors.misp.collector: MISP Generic @@ -641,6 +644,9 @@ Requires the rsync executable * `temp_directory`: The temporary directory for rsync to use for rsync'd files. Optional. Default: `$VAR_STATE_PATH/rsync_collector`. `$VAR_STATE_PATH` is `/var/run/intelmq/` or `/opt/intelmq/var/run/`. +.. _intelmq.bots.collectors.shadowserver.collector_reports_api: + + .. _intelmq.bots.collectors.shadowserver.collector_reports_api: Shadowserver Reports API @@ -1700,6 +1706,7 @@ It is required to look up the correct configuration. Look at the documentation in the bot's ``_config.py`` file for more information. +.. _intelmq.bots.parsers.shodan.parser: .. _intelmq.bots.parsers.shodan.parser: @@ -1788,7 +1795,6 @@ Aggregate **Configuration Parameters** * **Cache parameters** (see in section :ref:`common-parameters`) - * TTL is not used, using it would result in data loss. * **fields** Given fields which are used to aggregate like `classification.type, classification.identifier` * **threshold** If the aggregated event is lower than the given threshold after the timespan, the event will get dropped. @@ -1856,6 +1862,8 @@ Use this command to create/update the database and reload the bot: The database is fetched from `routeviews.org `_ and licensed under the Creative Commons Attribution 4.0 International license (see the `routeviews FAQ `_). +.. _intelmq.bots.experts.csv_converter.expert: + .. _intelmq.bots.experts.csv_converter.expert: CSV Converter @@ -1924,6 +1932,8 @@ RemoveAffix Remove part of string from string, example: `www.` from domains. +.. _intelmq.bots.experts.domain_suffix.expert: + .. _intelmq.bots.experts.domain_suffix.expert: Domain Suffix @@ -2696,6 +2706,8 @@ RDAP } +.. _intelmq.bots.experts.recordedfuture_iprisk.expert: + .. _intelmq.bots.experts.recordedfuture_iprisk.expert: RecordedFuture IP risk @@ -2986,6 +2998,23 @@ The following operators may be used to match events: * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: ``if extra.has_known_vulns == true { ... }`` + * `:equals` tests for equality between lists, including order. Example for checking a hostname-port pair: + ``if extra.host_tuple :equals ['dns.google', 53] { ... }`` + * `:setequals` tests for set-based equality (ignoring duplicates and value order) between a list of given values. Example for checking for the first nameserver of two domains, regardless of the order they are given in the list: + ``if extra.hostnames :setequals ['ns1.example.com', 'ns1.example.mx'] { ... }`` + + * `:overlaps` tests if there is at least one element in common between the list specified by a key and a list of values. Example for checking if at least one of the ICS, database or vulnerable tags is given: + ``if extra.tags :overlaps ['ics', 'database', 'vulnerable'] { ... } `` + + * `:subsetof` tests if the list of values from the given key only contains values from a set of values specified as the argument. Example for checking for a host that has only ns1.example.com and/or ns2.[...] as its apparent hostname: + ``if extra.hostnames :subsetof ['ns1.example.com', 'ns2.example.com'] { ... }`` + + * `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. Example for matching hosts with at least the IoT and vulnerable tags: + ``if extra.tags :supersetof ['iot', 'vulnerable'] { ... }`` + + * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: + ``if extra.has_known_vulns == true { ... }`` + * The combination of multiple expressions can be done using parenthesis and boolean operators: ``if (source.ip == '127.0.0.1') && (comment == 'add field' || classification.taxonomy == 'vulnerable') { ... }`` @@ -3396,7 +3425,6 @@ Events without `source.url`, `source.fqdn`, `source.ip`, or `source.asn`, are ig only contains the domain. uWhoisd will automatically strip the subdomain part if it is present in the request. Example: `https://www.theguardian.co.uk` - * TLD: `co.uk` (uWhoisd uses the `Mozilla public suffix list `_ as a reference) * Domain: `theguardian.co.uk` * Subdomain: `www` @@ -4099,6 +4127,8 @@ Then, set the `database` parameter to the `your-db.db` file path. .. _intelmq.bots.outputs.stomp.output: +.. _intelmq.bots.outputs.stomp.output: + STOMP ^^^^^ diff --git a/docs/user/eventdb.rst b/docs/user/eventdb.rst index 971094e6c7..6605630d7e 100644 --- a/docs/user/eventdb.rst +++ b/docs/user/eventdb.rst @@ -25,6 +25,21 @@ Having an `events` table as outlined in the SQL file, IntelMQ's :ref:`intelmq.bo This events table is the core of the so-called EventDB and also required by all other sections of this document. +----------------------- +The events table itself +----------------------- + +IntelMQ comes with the ``intelmq_psql_initdb`` command line tool. It creates an SQL file containing: + +- A ``CREATE TABLE events`` statement with all valid IntelMQ fields as columns and correct types +- Several indexes as examples for a good read & search performance + +All elements of this SQL file can be adapted and extended before running the SQL file against a database, especially the indexes. + +Having an `events` table as outlined in the SQL file, IntelMQ's PostgreSQL Output bot can write all received events into this database table. + +This events table is the core of the so-called EventDB and also required by all other sections of this document. + ----------------- EventDB Utilities ----------------- diff --git a/intelmq/bin/intelmqdump.py b/intelmq/bin/intelmqdump.py index 0f860d60c9..0d6d047fb1 100644 --- a/intelmq/bin/intelmqdump.py +++ b/intelmq/bin/intelmqdump.py @@ -349,7 +349,7 @@ def main(): if queue_name in pipeline_pipes: if runtime_config[pipeline_pipes[queue_name]]['group'] == 'Parser' and json.loads(msg)['__type'] == 'Event': print('Event converted to Report automatically.') - msg = message.Report(message.MessageFactory.unserialize(msg)).serialize() + msg = message.Report(message.MessageFactory.deserialize(msg)).serialize() else: print(red(f"The given queue '{queue_name}' is not configured. Please retry with a valid queue.")) break diff --git a/intelmq/bots/collectors/amqp/collector_amqp.py b/intelmq/bots/collectors/amqp/collector_amqp.py index 543f4e0bed..d1fb8a4d62 100644 --- a/intelmq/bots/collectors/amqp/collector_amqp.py +++ b/intelmq/bots/collectors/amqp/collector_amqp.py @@ -79,7 +79,7 @@ def process(self): self.logger.exception('Error receiving messages.') else: if self.expect_intelmq_message: - message = MessageFactory.unserialize(body.decode()) + message = MessageFactory.deserialize(body.decode()) self.send_message(message, auto_add=False) else: report = self.new_report() diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py new file mode 100644 index 0000000000..73e82aca66 --- /dev/null +++ b/intelmq/bots/outputs/postgresql/output.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2015 National CyberSecurity Center +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Compatibility shim +""" + +from intelmq.bots.outputs.sql.output import SQLOutputBot + + +class PostgreSQLOutputBot(SQLOutputBot): + engine = 'postgresql' + + def init(self): + self.logger.warning("The output bot 'intelmq.bots.outputs.postgresql.output' " + "is deprecated and replaced by " + "'intelmq.bots.outputs.sql.output' with the parameter " + "'engine' = 'postgresql'. " + "The fallback compatibility will be removed in version 3.0.") + super().init() + + +BOT = PostgreSQLOutputBot diff --git a/intelmq/bots/outputs/redis/output.py b/intelmq/bots/outputs/redis/output.py index 2e4b12968f..c2440f1097 100644 --- a/intelmq/bots/outputs/redis/output.py +++ b/intelmq/bots/outputs/redis/output.py @@ -41,9 +41,7 @@ def process(self): event = self.receive_message() try: - self.output.lpush(self.queue, - event.to_json(hierarchical=self.hierarchical_output, - with_type=self.with_type)) + self.output.lpush(self.queue, event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical, with_type=self.with_type)) except Exception: self.logger.exception('Failed to send message. Reconnecting.') self.connect() diff --git a/intelmq/bots/outputs/udp/output.py b/intelmq/bots/outputs/udp/output.py index 9ab53a1d01..a19de549fe 100644 --- a/intelmq/bots/outputs/udp/output.py +++ b/intelmq/bots/outputs/udp/output.py @@ -37,7 +37,7 @@ def process(self): del event['raw'] if self.format == 'json': - self.send(self.header + event.to_json()) + self.send(self.header + event.to_pack(use_packer=self.format)) elif self.format == 'delimited': self.send(self.delimited(event)) diff --git a/intelmq/bots/parsers/json/parser.py b/intelmq/bots/parsers/json/parser.py index f66a5a7410..8e6e3b040f 100644 --- a/intelmq/bots/parsers/json/parser.py +++ b/intelmq/bots/parsers/json/parser.py @@ -26,9 +26,11 @@ def process(self): lines = [base64_decode(report['raw'])] for line in lines: - new_event = MessageFactory.unserialize(line, + new_event = MessageFactory.deserialize(line, harmonization=self.harmonization, - default_type='Event') + default_type='Event', + use_packer="json") + event = self.new_event(report) event.update(new_event) if 'raw' not in event: diff --git a/intelmq/bots/parsers/malwaredomains/parser.py b/intelmq/bots/parsers/malwaredomains/parser.py new file mode 100644 index 0000000000..c5b90eb81b --- /dev/null +++ b/intelmq/bots/parsers/malwaredomains/parser.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: 2015 National CyberSecurity Center +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +The descriptions give a hint about what the entry is about and is very mixed. +Most prominent description is "phishing", most of them are malware names. +More types could be mapped better, only the most obvious ones are done currently. +""" +import datetime + +from intelmq.lib import utils +from intelmq.lib.bot import Bot + + +class MalwareDomainsParserBot(Bot): + """Parse the Malware Domains feed""" + + def is_valid_date(self, strd): + try: + datetime.datetime.strptime(strd, '%Y%m%d') + return True + except Exception: + return False + + def process(self): + report = self.receive_message() + + raw_report = utils.base64_decode(report.get("raw")) + + for row in raw_report.splitlines(): + row = row.rstrip() + + if row.startswith("#") or len(row) == 0: + continue + + values = row.split('\t')[1:] + + event = self.new_event(report) + + event.add('source.fqdn', values[1]) + if values[2] == 'phishing': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'phishing') + elif values[2] == 'C&C': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'c2server') + else: + event.add('classification.identifier', values[2]) + event.add('classification.type', 'malware-distribution') + event.add('event_description.text', values[2]) + + for i in range(4, len(values)): + if self.is_valid_date(values[i]): + event.add('time.source', # times are GMT, verified via email + values[i] + "T00:00:00+00:00", overwrite=True) + break + + event.add('raw', row) + + self.send_message(event) + self.acknowledge_message() + + +BOT = MalwareDomainsParserBot diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 6a4521145a..01df0262f4 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -110,7 +110,6 @@ def convert_bool(value: str) -> Optional[bool]: def validate_to_none(value: str) -> Optional[str]: return None if (not value or value in {'0', 'unknown'}) else value - def convert_int(value: str) -> Optional[int]: """ Returns an int or None for empty strings. """ return int(value) if value else None @@ -2520,7 +2519,7 @@ def force_base64(value: Optional[str]) -> Optional[str]: ('extra.', 'device_type', validate_to_none), ('extra.', 'device_model', validate_to_none), ('destination.ip', 'dst_ip', validate_ip), - ('destination.port', 'dst_port'), + ('destination.port', 'dst_port'), ('destination.asn', 'dst_asn', invalidate_zero), ('destination.geolocation.cc', 'dst_geo'), ('destination.geolocation.region', 'dst_region'), @@ -2970,6 +2969,183 @@ def scan_exchange_identifier(field): } } +# https://www.shadowserver.org/what-we-do/network-reporting/honeypot-darknet-events-report/ +event4_honeypot_darknet = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'src_ip'), + ], + 'optional_fields': [ + ('source.port', 'src_port'), + ('source.asn', 'src_asn', invalidate_zero), + ('source.geolocation.cc', 'src_geo'), + ('source.geolocation.region', 'src_region'), + ('source.geolocation.city', 'src_city'), + ('source.reverse_dns', 'src_hostname'), + ('extra.source.naics', 'src_naics', convert_int), + ('extra.source.sector', 'src_sector', validate_to_none), + ('extra.', 'device_vendor', validate_to_none), + ('extra.', 'device_type', validate_to_none), + ('extra.', 'device_model', validate_to_none), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port', convert_int), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('classification.identifier', 'tag'), # different values possible in this report + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('extra.', 'count', convert_int), + ], + 'constant_fields': { + 'classification.taxonomy': 'other', + 'classification.type': 'other', + }, +} + +event46_sinkhole_http = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'src_ip'), + ('source.port', 'src_port'), + ], + 'optional_fields': [ + ('protocol.transport', 'protocol'), + ('source.asn', 'src_asn', invalidate_zero), + ('source.geolocation.cc', 'src_geo'), + ('source.geolocation.region', 'src_region'), + ('source.geolocation.city', 'src_city'), + ('source.reverse_dns', 'src_hostname'), + ('extra.source.naics', 'src_naics', convert_int), + ('extra.source.sector', 'src_sector', validate_to_none), + ('extra.', 'device_vendor', validate_to_none), + ('extra.', 'device_type', validate_to_none), + ('extra.', 'device_model', validate_to_none), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port'), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('classification.identifier', 'tag'), + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('destination.url', 'http_url', convert_http_host_and_url, True), + ('destination.fqdn', 'http_host', validate_fqdn), + ('extra.', 'http_agent', validate_to_none), + ('extra.', 'forwarded_by', validate_to_none), + ('extra.', 'ssl_cipher', validate_to_none), + ('extra.', 'http_referer', validate_to_none), + ], + 'constant_fields': { + 'classification.taxonomy': 'malicious code', + 'classification.type': 'infected-system', + 'protocol.application': 'http', + }, +} + + +# https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ +def scan_exchange_taxonomy(field): + if field == 'exchange;webshell': + return 'intrusions' + return 'vulnerable' + + +def scan_exchange_type(field): + if field == 'exchange;webshell': + return 'system-compromise' + return 'infected-system' + + +def scan_exchange_identifier(field): + if field == 'exchange;webshell': + return 'exchange-server-webshell' + return 'vulnerable-exchange-server' + + +scan_exchange = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'ip'), + ('source.port', 'port'), + ], + 'optional_fields': [ + ('source.reverse_dns', 'hostname'), + ('extra.', 'tag'), + ('source.asn', 'asn', invalidate_zero), + ('source.geolocation.cc', 'geo'), + ('source.geolocation.region', 'region'), + ('source.geolocation.city', 'city'), + ('extra.source.naics', 'naics', convert_int), + ('extra.', 'sic', invalidate_zero), + ('extra.source.sector', 'sector', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'servername', validate_to_none), + ('classification.taxonomy', 'tag', scan_exchange_taxonomy), + ('classification.type', 'tag', scan_exchange_type), + ('classification.identifier', 'tag', scan_exchange_identifier), + ], + 'constant_fields': { + }, +} + +# https://www.shadowserver.org/what-we-do/network-reporting/sinkhole-http-referer-events-report/ +event46_sinkhole_http_referer = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port'), + ], + 'optional_fields': [ + ('extra.', 'http_referer_ip', validate_ip), + ('extra.', 'http_referer_asn', convert_int), + ('extra.', 'http_referer_geo', validate_to_none), + ('extra.', 'http_referer_region', validate_to_none), + ('extra.', 'http_referer_city', validate_to_none), + ('extra.', 'http_referer_hostname', validate_to_none), + ('extra.', 'http_referer_naics', invalidate_zero), + ('extra.', 'http_referer_sector', validate_to_none), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('extra.', 'tag', validate_to_none), + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('destination.url', 'http_url', convert_http_host_and_url, True), + ('destination.fqdn', 'http_host', validate_fqdn), + ('extra.', 'http_referer', validate_to_none), + ], + 'constant_fields': { + 'classification.identifier': 'sinkhole-http-referer', + 'classification.taxonomy': 'other', + 'classification.type': 'other', + } +} + mapping = ( # feed name, file name, function ('Accessible-ADB', 'scan_adb', accessible_adb), diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index caed55de28..d719264c27 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -101,6 +101,7 @@ class Bot: statistics_host: str = "127.0.0.1" statistics_password: Optional[str] = None statistics_port: int = 6379 + use_packer: str = os.environ.get('INTELMQ_USE_PACKER', 'MsgPack') _message_processed_verb: str = 'Processed' @@ -329,8 +330,8 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.logger.error('Pipeline failed.') self.__disconnect_pipelines() - except exceptions.DecodingError as exc: - self.logger.exception('Could not decode message from pipeline. No retries useful.') + except exceptions.DeserializationError as exc: + self.logger.exception('Could not deserialize message from pipeline. No retries useful.') # ensure that we do not re-process the faulty message self.__error_retries_counter = self.error_max_retries + 1 @@ -661,7 +662,7 @@ def receive_message(self) -> libmessage.Message: return self.receive_message() try: - self.__current_message = libmessage.MessageFactory.unserialize(message, + self.__current_message = libmessage.MessageFactory.deserialize(message, harmonization=self.harmonization) except exceptions.InvalidKey as exc: # In case a incoming message is malformed an does not conform with the currently @@ -820,7 +821,7 @@ def __init_logger(self): def __log_configuration_parameter(self, config_name: str, option: str, value: Any): if "password" in option or "token" in option: - value = "HIDDEN" + value = "" message = "{} configuration: parameter {!r} loaded with value {!r}." \ .format(config_name.title(), option, value) @@ -1318,9 +1319,8 @@ def export_event(self, event: libmessage.Event, if 'raw' in event: del event['raw'] if return_type is str: - return event.to_json(hierarchical=self.hierarchical, - with_type=self.with_type, - jsondict_as_string=self.jsondict_as_string) + return event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical, + with_type=self.with_type) else: retval = event.to_dict(hierarchical=self.hierarchical, with_type=self.with_type, diff --git a/intelmq/lib/bot_debugger.py b/intelmq/lib/bot_debugger.py index 0afeaa5c1a..909b2303c8 100644 --- a/intelmq/lib/bot_debugger.py +++ b/intelmq/lib/bot_debugger.py @@ -169,7 +169,7 @@ def outputappend(self, msg): def arg2msg(self, msg): default_type = "Report" if (self.runtime_configuration.get("group", None) == "Parser" or isinstance(self.instance, ParserBot)) else "Event" try: - msg = MessageFactory.unserialize(msg, default_type=default_type) + msg = MessageFactory.deserialize(msg, default_type=default_type) except (Exception, KeyError, TypeError, ValueError) as exc: if exists(msg): with open(msg) as f: diff --git a/intelmq/lib/exceptions.py b/intelmq/lib/exceptions.py index 5c8230d8da..824010e3d6 100644 --- a/intelmq/lib/exceptions.py +++ b/intelmq/lib/exceptions.py @@ -167,3 +167,26 @@ def __init__(self, encodings=None, exception: UnicodeDecodeError = None, suffixes.append('with reason %r' % exception.reason) suffix = (' ' + ' '.join(suffixes)) if suffixes else '' super().__init__("Could not decode string%s." % suffix) + + +class DeserializationError(IntelMQException, ValueError): + """ + Unrecoverable error during message deserialization + """ + def __init__(self, exception: Exception = None, object: bytes = None): + self.object = object + super().__init__("Could not deserialize message, %s." % exception) + + +class SerializationError(IntelMQException, ValueError): + """ + Unrecoverable error during message serialization + """ + def __init__(self, exception: Exception = None, object: bytes = None): + self.object = object + super().__init__("Could not serialize message, %s." % exception) + + +class MissingPackerError(IntelMQException): + def __init__(self, packer: str): + super().__init__(f"Could not load '{packer}' as packer, please check intelmq.lib.packers.{packer.lower()} and documentation") diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 69137209ac..e5b4460dc9 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -9,6 +9,8 @@ Use MessageFactory to get a Message object (types Report and Event). """ import hashlib +import importlib +import inspect import json import re import warnings @@ -19,6 +21,7 @@ import intelmq.lib.harmonization from intelmq import HARMONIZATION_CONF_FILE from intelmq.lib import utils +from intelmq.lib.packers.packer import Packer __all__ = ['Event', 'Message', 'MessageFactory', 'Report'] VALID_MESSSAGE_TYPES = ('Event', 'Message', 'Report') @@ -28,8 +31,8 @@ class MessageFactory: """ - unserialize: JSON encoded message to object - serialize: object to JSON encoded object + deserialize: packed message to object + serialize: object to packed """ @staticmethod @@ -44,7 +47,7 @@ def from_dict(message: dict, harmonization=None, default_type: If '__type' is not present in message, the given type will be used See also: - MessageFactory.unserialize + MessageFactory.deserialize MessageFactory.serialize """ if default_type and "__type" not in message: @@ -60,8 +63,8 @@ def from_dict(message: dict, harmonization=None, return class_reference(message, auto=True, harmonization=harmonization) @staticmethod - def unserialize(raw_message: str, harmonization: dict = None, - default_type: Optional[str] = None) -> dict: + def deserialize(raw_message: bytes, harmonization: dict = None, + default_type: Optional[str] = None, use_packer: str = "MsgPack", **kwargs) -> dict: """ Takes JSON-encoded Message object, returns instance of correct class. @@ -74,19 +77,18 @@ def unserialize(raw_message: str, harmonization: dict = None, MessageFactory.from_dict MessageFactory.serialize """ - message = Message.unserialize(raw_message) + message = Message.deserialize(raw_message, use_packer=use_packer, **kwargs) return MessageFactory.from_dict(message, harmonization=harmonization, default_type=default_type) @staticmethod - def serialize(message): + def serialize(message, use_packer: str = 'MsgPack', **kwargs) -> bytes: """ - Takes instance of message-derived class and makes JSON-encoded Message. + Takes instance of message-derived class and makes packed Message. The class is saved in __type attribute. """ - raw_message = Message.serialize(message) - return raw_message + return Message.serialize(message, use_packer=use_packer, **kwargs) class Message(dict): @@ -127,7 +129,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, elif isinstance(message, tuple): self.iterable = dict(message) else: - raise ValueError("Type %r of message can't be handled, must be dict or tuple.", type(message)) + raise ValueError("Type %r of message can't be handled, must be dict or tuple." % type(message)) for key, value in self.iterable.items(): if not self.add(key, value, sanitize=False, raise_failure=False): self.add(key, value, sanitize=True) @@ -306,22 +308,43 @@ def copy(self): return retval def deep_copy(self): - return MessageFactory.unserialize(MessageFactory.serialize(self), + return MessageFactory.deserialize(MessageFactory.serialize(self), harmonization={self.__class__.__name__.lower(): self.harmonization_config}) def __str__(self): - return self.serialize() + return self.serialize(use_packer="JSON") - def serialize(self): - self['__type'] = self.__class__.__name__ - json_dump = utils.decode(json.dumps(self)) - del self['__type'] - return json_dump + def serialize(self, use_packer: str = "MsgPack", **kwargs): + delete_type = False + if '__type' not in self: + delete_type = True + self['__type'] = self.__class__.__name__ + + try: + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) + + try: + packed = packer.serialize(data=self, **kwargs) + except Exception as exc: + raise exceptions.SerializationError(exception=exc, object=self) + + if delete_type: + del self['__type'] + return packed @staticmethod - def unserialize(message_string: str): - message = json.loads(message_string) - return message + def deserialize(message: bytes, use_packer: str = "MsgPack", **kwargs): + try: + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) + + try: + return packer.deserialize(data=message, **kwargs) + except Exception as exc: + raise exceptions.DeserializationError(exception=exc, object=message) def __is_valid_key(self, key: str): try: @@ -470,13 +493,21 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False, json_dict_fp = json_dict_fp[subkey] for key, value in jsondicts.items(): - new_dict[key] = json.dumps(value, ensure_ascii=False) + new_dict[key] = json.dumps(value) return new_dict - def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False): - json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) - return json.dumps(json_dict, ensure_ascii=False, sort_keys=True) + def to_pack(self, use_packer="MsgPack", hierarchical=False, with_type=False, **kwargs): + try: + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) + + try: + data = self.to_dict(hierarchical=hierarchical, with_type=with_type) + return packer.serialize(data, **kwargs) + except Exception as exc: + raise exceptions.SerializationError(exception=exc, object=self) def __eq__(self, other: dict) -> bool: """ diff --git a/intelmq/lib/packers/__init__.py b/intelmq/lib/packers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/intelmq/lib/packers/json/__init__.py b/intelmq/lib/packers/json/__init__.py new file mode 100644 index 0000000000..99ea3f622f --- /dev/null +++ b/intelmq/lib/packers/json/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- diff --git a/intelmq/lib/packers/json/packer.py b/intelmq/lib/packers/json/packer.py new file mode 100644 index 0000000000..d0ae7a2671 --- /dev/null +++ b/intelmq/lib/packers/json/packer.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +from intelmq.lib.packers.packer import Packer +import json + + +class JSON(Packer): + def __init__(self) -> None: + super().__init__() + + def serialize(self, data, **kwargs) -> bytes: + return json.dumps(data, **kwargs) + + def deserialize(self, data, **kwargs) -> object: + return json.loads(data, **kwargs) diff --git a/intelmq/lib/packers/msgpack/REQUIREMENTS.txt b/intelmq/lib/packers/msgpack/REQUIREMENTS.txt new file mode 100644 index 0000000000..071d9d7489 --- /dev/null +++ b/intelmq/lib/packers/msgpack/REQUIREMENTS.txt @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +msgpack>=0.5 diff --git a/intelmq/lib/packers/msgpack/__init__.py b/intelmq/lib/packers/msgpack/__init__.py new file mode 100644 index 0000000000..99ea3f622f --- /dev/null +++ b/intelmq/lib/packers/msgpack/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- diff --git a/intelmq/lib/packers/msgpack/packer.py b/intelmq/lib/packers/msgpack/packer.py new file mode 100644 index 0000000000..259f312bb7 --- /dev/null +++ b/intelmq/lib/packers/msgpack/packer.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +from intelmq.lib.packers.packer import Packer +from intelmq.lib import exceptions + + +try: + import msgpack +except: + msgpack = None + + +class MsgPack(Packer): + def __init__(self) -> None: + if msgpack is None: + raise exceptions.MissingDependencyError("msgpack") + super().__init__() + + def serialize(self, data, **kwargs) -> bytes: + return msgpack.packb(data, **kwargs) + + def deserialize(self, data, **kwargs) -> object: + return msgpack.unpackb(data, raw=False, **kwargs) diff --git a/intelmq/lib/packers/packer.py b/intelmq/lib/packers/packer.py new file mode 100644 index 0000000000..2f97c62837 --- /dev/null +++ b/intelmq/lib/packers/packer.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +class Packer(): + def __init__(self) -> None: + pass + + def serialize(self, data: bytes, **kwargs) -> bytes: + raise NotImplementedError() + + def deserialize(self, data: bytes, **kwargs) -> object: + raise NotImplementedError() diff --git a/intelmq/lib/pipeline.py b/intelmq/lib/pipeline.py index ba0bf46998..62f8cd3b74 100644 --- a/intelmq/lib/pipeline.py +++ b/intelmq/lib/pipeline.py @@ -125,14 +125,14 @@ def send(self, message: str, path: str = "_default", path_permissive: bool = False): raise NotImplementedError - def receive(self) -> str: + def receive(self) -> bytes: if self._has_message: raise exceptions.PipelineError("There's already a message, first " "acknowledge the existing one.") retval = self._receive() self._has_message = True - return utils.decode(retval) + return retval def _receive(self) -> bytes: raise NotImplementedError diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 301ce70383..3e0a9941d2 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -11,6 +11,7 @@ """ import io import inspect +import importlib import json import os import re @@ -22,6 +23,7 @@ import redis import intelmq.lib.message as message +from intelmq.lib.packers.packer import Packer import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils from intelmq import CONFIG_DIR, RUNTIME_CONF_FILE @@ -158,8 +160,7 @@ def setUpClass(cls): elif cls.bot_type != 'collector' and cls.default_input_message == '': cls.default_input_message = {'__type': 'Event'} if type(cls.default_input_message) is dict: - cls.default_input_message = \ - utils.decode(json.dumps(cls.default_input_message)) + cls.default_input_message = message.MessageFactory.serialize(cls.default_input_message, os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) if cls.use_cache and not os.environ.get('INTELMQ_SKIP_REDIS'): password = os.environ.get('INTELMQ_TEST_REDIS_PASSWORD') or \ @@ -176,10 +177,10 @@ def setUpClass(cls): harmonization = utils.load_configuration(pkg_resources.resource_filename('intelmq', 'etc/harmonization.conf')) - def new_report(self, auto=False, examples=False): + def new_report(self, auto=False, examples=False) -> message.Report: return message.Report(harmonization=self.harmonization, auto=auto) - def new_event(self): + def new_event(self) -> message.Event: return message.Event(harmonization=self.harmonization) def get_mocked_logger(self, logger): @@ -247,7 +248,7 @@ def prepare_source_queue(self): self.input_queue = [] for msg in self.input_message: if type(msg) is dict: - self.input_queue.append(json.dumps(msg)) + self.input_queue.append(message.MessageFactory.serialize(msg)) elif issubclass(type(msg), message.Message): self.input_queue.append(msg.serialize()) else: @@ -331,8 +332,8 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if report has required fields. """ if self.bot_type == 'collector': - for report_json in self.get_output_queue(): - report = message.MessageFactory.unserialize(report_json, + for report_data in self.get_output_queue(): + report = message.MessageFactory.deserialize(report_data, harmonization=self.harmonization) self.assertIsInstance(report, message.Report) self.assertIn('raw', report) @@ -340,8 +341,8 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if event has required fields. """ if self.bot_type == 'parser': - for event_json in self.get_output_queue(): - event = message.MessageFactory.unserialize(event_json, + for event_data in self.get_output_queue(): + event = message.MessageFactory.deserialize(event_data, harmonization=self.harmonization) self.assertIsInstance(event, message.Event) self.assertIn('classification.type', event) @@ -408,7 +409,7 @@ def get_output_queue(self, path="_default"): """Getter for items in the output queues of this bot. Use in TestCase scenarios If there is multiple queues in named queue group, we return all the items chained. """ - return [utils.decode(text) for text in chain(*[self.pipe.state[x] for x in self.pipe.destination_queues[path]])] + return [text for text in chain(*[self.pipe.state[x] for x in self.pipe.destination_queues[path]])] # return [utils.decode(text) for text in self.pipe.state["%s-output" % self.bot_id]] def test_bot_name(self, *args, **kwargs): @@ -532,16 +533,15 @@ def assertOutputQueueLen(self, queue_len=0, path="_default"): """ self.assertEqual(len(self.get_output_queue(path=path)), queue_len) - def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_default"): + def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_default", use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')): """ Asserts that the given expected_message is contained in the generated event with given queue position. """ event = self.get_output_queue(path=path)[queue_pos] - self.assertIsInstance(event, str) - - event_dict = json.loads(event) + self.assertIsInstance(event, bytes) + event_dict = message.MessageFactory.deserialize(raw_message=event, use_packer=use_packer) if isinstance(expected_msg, (message.Event, message.Report)): expected = expected_msg.to_dict(with_type=True) else: diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 83dd289232..80ad97bd7c 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -610,7 +610,7 @@ def v230_feed_changes(configuration, harmonization, dry_run, **kwargs): def v300_bots_file_removal(configuration, harmonization, dry_run, **kwargs): """ - Remove BOTS file + Migrate Abuse.ch Feodotracker Browser feed parsing parameters """ changed = None messages = [] diff --git a/intelmq/tests/bots/collectors/tcp/test_collector.py b/intelmq/tests/bots/collectors/tcp/test_collector.py index 117dd9316b..19b8e1c618 100644 --- a/intelmq/tests/bots/collectors/tcp/test_collector.py +++ b/intelmq/tests/bots/collectors/tcp/test_collector.py @@ -96,7 +96,8 @@ def set_bot(cls): cls.sysconfig = {'http_url': 'http://localhost/two_files.tar.gz', 'extract_files': True, 'name': 'Example feed', 'ip': 'localhost', - 'port': PORT + 'port': PORT, + "use_packer": "json" } def test_random_input(self): @@ -105,7 +106,7 @@ def test_random_input(self): thread.start() self.run_bot() self.assertOutputQueueLen(2) - generated_report = MessageFactory.unserialize(self.get_output_queue()[1], harmonization=self.harmonization, + generated_report = MessageFactory.deserialize(self.get_output_queue()[1], harmonization=self.harmonization, default_type='Event') self.assertEqual(base64_decode(generated_report['raw']), ORIGINAL_DATA.split(SEPARATOR)[1]) @@ -124,9 +125,11 @@ def test_intelmq_exchange(self): self.assertOutputQueueLen(msg_count) for i, msg in enumerate(self.get_output_queue()): - report = MessageFactory.unserialize(msg, harmonization=self.harmonization, default_type='Event') + report = MessageFactory.deserialize(msg, harmonization=self.harmonization, default_type='Event') - output = MessageFactory.unserialize(utils.base64_decode(report["raw"]), harmonization=self.harmonization, default_type='Event') + output = MessageFactory.deserialize(utils.base64_decode(report["raw"]), + harmonization=self.harmonization, + default_type='Event') self.assertDictEqual(output, INPUT1) del report['time.observation'] diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 8d109e694d..f5d911df9e 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -3,11 +3,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- -import json +import os import unittest import intelmq.lib.test as test from intelmq.bots.experts.cymru_whois.expert import CymruExpertBot +from intelmq.lib.message import MessageFactory EXAMPLE_INPUT = {"__type": "Event", "source.ip": "78.104.144.2", # example.com @@ -93,7 +94,7 @@ def test_6to4_result(self): """ self.input_message = EXAMPLE_6TO4_INPUT self.run_bot() - actual = json.loads(self.get_output_queue()[0]) + actual = MessageFactory.serialize(self.get_output_queue()[0], use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) self.assertDictContainsSubset(EXAMPLE_6TO4_INPUT, actual) self.assertIn("source.asn", actual) self.assertIn("source.as_name", actual) diff --git a/intelmq/tests/bots/experts/idea/test_expert.py b/intelmq/tests/bots/experts/idea/test_expert.py index 53c1f392d7..2be06ce591 100644 --- a/intelmq/tests/bots/experts/idea/test_expert.py +++ b/intelmq/tests/bots/experts/idea/test_expert.py @@ -3,10 +3,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- +import os import unittest import json import intelmq.lib.test as test +from intelmq.lib.message import MessageFactory from intelmq.bots.experts.idea.expert import IdeaExpertBot from intelmq.lib.harmonization import ClassificationType @@ -86,10 +88,10 @@ def test_conversion(self): # The ID in the generated Idea event is random, so we have to extract # the data from the "output" field and compare after removing ID's event = self.get_output_queue()[0] - self.assertIsInstance(event, str) - event_dict = json.loads(event) + self.assertIsInstance(event, bytes) + event_dict = MessageFactory.deserialize(event, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) self.assertIsInstance(event_dict, dict) - self.assertTrue("output" in event_dict) + self.assertTrue(b"output" in event_dict) idea_event = json.loads(event_dict["output"]) self.assertIsInstance(idea_event, dict) del TEST_OUTPUT1["ID"] diff --git a/intelmq/tests/bots/experts/rdap/test_expert.py b/intelmq/tests/bots/experts/rdap/test_expert.py index 59564c3b35..dea0232fab 100644 --- a/intelmq/tests/bots/experts/rdap/test_expert.py +++ b/intelmq/tests/bots/experts/rdap/test_expert.py @@ -67,7 +67,11 @@ def set_bot(cls): def test(self, mocker): prepare_mocker(mocker) self.input_message = EXAMPLE_INPUT - self.run_bot() + self.run_bot(parameters={ + 'rdap_bootstrapped_servers': { + 'versicherung': 'http://localhost/rdap/v1/', + } + }) self.assertMessageEqual(0, EXAMPLE_OUTPUT) def test_object_validation(self, mocker): diff --git a/intelmq/tests/bots/outputs/file/test_output.py b/intelmq/tests/bots/outputs/file/test_output.py index a585a24a03..50ce381927 100644 --- a/intelmq/tests/bots/outputs/file/test_output.py +++ b/intelmq/tests/bots/outputs/file/test_output.py @@ -18,7 +18,9 @@ def set_bot(cls): cls.bot_reference = FileOutputBot cls.os_fp, cls.filename = tempfile.mkstemp() cls.sysconfig = {"hierarchical_output": True, - "file": cls.filename} + "file": cls.filename, + "use_packer": "json", + } def test_event(self): self.run_bot() diff --git a/intelmq/tests/bots/outputs/files/test_output.py b/intelmq/tests/bots/outputs/files/test_output.py index dba47b6582..1793595565 100644 --- a/intelmq/tests/bots/outputs/files/test_output.py +++ b/intelmq/tests/bots/outputs/files/test_output.py @@ -48,7 +48,9 @@ def test_event_whole(self): "dir": self.incoming_path, "tmp": self.tmp_path, "single_key": None, - "suffix": ""} + "suffix": "", + "use_packer": "json", + } self.run_bot() name = os.listdir(self.incoming_path)[0] with open(pth.join(self.incoming_path, name), encoding="utf-8") as f: diff --git a/intelmq/tests/bots/outputs/redis/test_output.py b/intelmq/tests/bots/outputs/redis/test_output.py index e47360e6d7..91ddf15d56 100644 --- a/intelmq/tests/bots/outputs/redis/test_output.py +++ b/intelmq/tests/bots/outputs/redis/test_output.py @@ -50,7 +50,9 @@ def set_bot(cls): "redis_db": 4, "redis_queue": "test-redis-output-queue", "redis_password": os.getenv('INTELMQ_TEST_REDIS_PASSWORD'), - "redis_timeout": "50000"} + "redis_timeout": "50000", + "use_packer": 'json', + } @test.skip_redis() def test_event(self): diff --git a/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py b/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py index 28ceb525fd..241120296d 100644 --- a/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py +++ b/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py @@ -12,6 +12,7 @@ import intelmq.lib.test as test import intelmq.lib.utils as utils +from intelmq.lib.message import MessageFactory from intelmq.bots.outputs.redis.output import RedisOutputBot EXAMPLE_EVENT = {"classification.type": "infected-system", @@ -37,45 +38,45 @@ "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==" } EXAMPLE_EVENT_JSON = { - "feed": { - "url": "http://alerts.bitsighttech.com:8080/stream?", - "name": "BitSight", - "accuracy": 100.0 - }, - "malware": { - "name": "salityp2p" - }, - "time": { - "observation": "2016-04-19T23:16:08+00:00", - "source": "2016-04-19T23:16:08+00:00" - }, - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" - "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" - "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" - "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" - "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" - "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", - "classification": { - "type": "infected-system" - }, - "destination": { - "port": 9796, - "ip": "52.18.196.169" - }, - "extra": { - "non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166" - }, - "event_description": { - "text": "Sinkhole attempted connection" - }, - "source": { - "port": 65118, - "geolocation": { - "country": "Dominican Republic" - }, - "ip": "152.166.119.2" - } - } + "feed": { + "url": "http://alerts.bitsighttech.com:8080/stream?", + "name": "BitSight", + "accuracy": 100.0 + }, + "malware": { + "name": "salityp2p" + }, + "time": { + "observation": "2016-04-19T23:16:08+00:00", + "source": "2016-04-19T23:16:08+00:00" + }, + "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" + "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" + "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" + "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" + "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" + "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", + "classification": { + "type": "infected-system" + }, + "destination": { + "port": 9796, + "ip": "52.18.196.169" + }, + "extra": { + "non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166" + }, + "event_description": { + "text": "Sinkhole attempted connection" + }, + "source": { + "port": 65118, + "geolocation": { + "country": "Dominican Republic" + }, + "ip": "152.166.119.2" + } +} class TestRedisOutputBot(test.BotTestCase, unittest.TestCase): @@ -84,13 +85,14 @@ class TestRedisOutputBot(test.BotTestCase, unittest.TestCase): def set_bot(cls): cls.bot_reference = RedisOutputBot cls.default_input_message = EXAMPLE_EVENT - cls.sysconfig = {"redis_server_ip": os.getenv('INTELMQ_PIPELINE_HOST', 'localhost'), + cls.sysconfig = {"redis_server_ip": os.getenv('INTELMQ_PIPELINE_HOST', '127.0.0.1'), "redis_server_port": 6379, "redis_db": 4, "redis_queue": "test-redis-output-queue", "redis_password": os.getenv('INTELMQ_TEST_REDIS_PASSWORD'), "redis_timeout": "50000", "hierarchical_output": True, + "use_packer": 'json', "with_type": False, } @@ -118,9 +120,9 @@ def test_event(self): # Get the message from Redis event = utils.decode(redis_output.lpop(redis_queue)) - self.assertIsInstance(event, str) event_dict = json.loads(event) + MessageFactory.serialize() self.assertDictEqual(EXAMPLE_EVENT_JSON, event_dict) diff --git a/intelmq/tests/bots/parsers/json/test_parser.py b/intelmq/tests/bots/parsers/json/test_parser.py index c18d18dd00..38f97ec518 100644 --- a/intelmq/tests/bots/parsers/json/test_parser.py +++ b/intelmq/tests/bots/parsers/json/test_parser.py @@ -51,7 +51,6 @@ NO_DEFAULT_EVENT = MULTILINE_EVENTS[1].copy() NO_DEFAULT_EVENT['raw'] = base64.b64encode(b'{"source.ip": "127.0.0.2", "classification.type": "c2-server"}\n').decode() - class TestJSONParserBot(test.BotTestCase, unittest.TestCase): """ A TestCase for a MalwareDomainListParserBot. diff --git a/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license b/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license new file mode 100644 index 0000000000..f0b62ad2d3 --- /dev/null +++ b/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sebastian Wagner +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/malwaredomains/test_parser.py b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py new file mode 100644 index 0000000000..d6ba00605a --- /dev/null +++ b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2015 Sebastian Wagner +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +import base64 +import os +import unittest + +import intelmq.lib.test as test +from intelmq.bots.parsers.malwaredomains.parser import MalwareDomainsParserBot + +with open(os.path.join(os.path.dirname(__file__), 'domains.txt'), 'rb') as fh: + RAW = base64.b64encode(fh.read()).decode() + +OUTPUT1 = {'__type': 'Event', + 'classification.type': 'phishing', + 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', + 'raw': 'CQlleGFtcGxlLmNvbQlwaGlzaGluZwlvcGVucGhpc2guY29tCTIwMTYwNTI3CTIwMTYwMTA4', + 'source.fqdn': 'example.com', + 'time.source': '2016-05-27T00:00:00+00:00'} +OUTPUT2 = {'__type': 'Event', + 'classification.type': 'phishing', + 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', + 'raw': 'CQlleGFtcGxlLmludmFsaWQJcGhpc2hpbmcJb3BlbnBoaXNoLmNvbQkyMDE2MDUyNwkyMDE2MDEwOA==', + 'source.fqdn': 'example.invalid', + 'time.source': '2016-05-27T00:00:00+00:00'} +OUTPUT3 = {'__type': 'Event', + 'classification.type': 'c2-server', + 'event_description.text': 'C&C', + 'classification.identifier': 'C&C', + 'raw': 'CQlleGFtcGxlLm5ldAlDJkMJc291cmNlLmV4YW1wbGUuY29tCTIwMTcxMjAxCTIwMTYwNzE5CTIwMTYwMzEw', + 'source.fqdn': 'example.net', + 'time.source': '2017-12-01T00:00:00+00:00'} + + +class TestMalwareDomainsParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for MalwareDomainsParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = MalwareDomainsParserBot + cls.default_input_message = {'__type': 'Report', 'raw': RAW} + + def test_event(self): + self.run_bot() + self.assertMessageEqual(0, OUTPUT1) + self.assertMessageEqual(1, OUTPUT2) + self.assertMessageEqual(2, OUTPUT3) + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/shadowserver/test_broken.py b/intelmq/tests/bots/parsers/shadowserver/test_broken.py index 64bd342f37..f4bea7bcfa 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_broken.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_broken.py @@ -53,7 +53,7 @@ def test_broken(self): self.assertLogMatches(pattern="Detected report's file name: 'scan_http'.", levelname="DEBUG") self.assertLogMatches(pattern="Failed to parse line.") - self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Accessible-HTTP'. Possible change in data format or misconfiguration.") + self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Vulnerable-HTTP'. Possible change in data format or misconfiguration.") self.assertLogMatches(pattern=r"Sent 0 events and found 1 problem\(s\)\.", levelname="INFO") diff --git a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py index e993ea534f..462746f652 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: 2021 Birger Schacht # # SPDX-License-Identifier: AGPL-3.0-or-later - import os import unittest diff --git a/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py b/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py index 0e3e3cf8a1..62420cdc1e 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py @@ -1,6 +1,3 @@ -# SPDX-FileCopyrightText: 2021 Mikk Margus Möll -# -# SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- import os diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index b7b16192aa..8b2d8b2c44 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -61,7 +61,7 @@ def test_encoding_error_on_input_message(self): """ self.input_message = b'foo\xc9bar' self.run_bot(iterations=1, allowed_error_count=1) - self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.DecodingError:.*') + self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.DeserializationError:.*') self.assertEqual(self.pipe.state['test-bot-input-internal'], []) self.assertEqual(self.pipe.state['test-bot-input'], []) self.assertEqual(self.pipe.state['test-bot-output'], []) @@ -71,7 +71,7 @@ def test_invalid_value_on_input_message(self): Test if the bot is dumping / not retrying a message which is impossible to parse. https://github.com/certtools/intelmq/issues/1765 """ - self.input_message = b'{"source.asn": 0, "__type": "Event"}' + self.input_message = {"source.asn": 0, "__type": "Event"} self.run_bot(iterations=1, allowed_error_count=1) self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.InvalidValue:.*') self.assertEqual(self.pipe.state['test-bot-input-internal'], []) diff --git a/intelmq/tests/lib/test_bot_output.py b/intelmq/tests/lib/test_bot_output.py index 673b5b9c10..ee8278a63d 100644 --- a/intelmq/tests/lib/test_bot_output.py +++ b/intelmq/tests/lib/test_bot_output.py @@ -35,23 +35,23 @@ def process(self): class TestDummyOutputBot(BotTestCase, TestCase): @classmethod def set_bot(cls): - cls.sysconfig = {"return_type": None} + cls.sysconfig = {"return_type": None, "use_packer": "json"} cls.bot_reference = DummyOutputBot cls.default_input_message = RAW cls.allowed_error_count = 1 def test_export_raw(self): - self.run_bot(parameters={"single_key": "raw"}) + self.run_bot(parameters={"single_key": "raw", "use_packer": "json"}) self.assertEqual(self.bot.result, "\n") def test_export_output_dict(self): self.input_message = OUTPUT_DICT - self.run_bot(parameters={"single_key": "output"}) + self.run_bot(parameters={"single_key": "output", "use_packer": "json"}) self.assertEqual(self.bot.result, DICT) def test_export_output_dict_string(self): self.input_message = OUTPUT_DICT - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, OUTPUT_DICT['output']) def test_export_output_string(self): @@ -61,17 +61,17 @@ def test_export_output_string(self): def test_export_output_string_string(self): self.input_message = OUTPUT_STRING - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, STRING) def test_export_output_int(self): self.input_message = OUTPUT_INT - self.run_bot(parameters={"single_key": "output"}) + self.run_bot(parameters={"single_key": "output", "use_packer": "json"}) self.assertEqual(self.bot.result, INT) def test_export_output_int_string(self): self.input_message = OUTPUT_INT - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, OUTPUT_INT['output']) def test_export_keep_raw_hierarchical(self): @@ -79,6 +79,7 @@ def test_export_keep_raw_hierarchical(self): self.run_bot(parameters={"keep_raw_field": True, "message_hierarchical": True, "message_with_type": False, + "use_packer": "json", }) self.assertEqual(self.bot.result, RAW_HIERARCHICAL) @@ -88,6 +89,7 @@ def test_export_keep_raw_hierarchical_string(self): "message_hierarchical": True, "message_with_type": False, "return_type": str, + "use_packer": "json", }) self.assertEqual(self.bot.result, dumps(RAW_HIERARCHICAL, sort_keys=True)) @@ -96,5 +98,6 @@ def test_export_now_raw_type(self): self.input_message = INPUT self.run_bot(parameters={"keep_raw_field": False, "message_with_type": True, + "use_packer": "json" }) self.assertEqual(self.bot.result, NO_RAW_TYPE) diff --git a/intelmq/tests/lib/test_exceptions.py b/intelmq/tests/lib/test_exceptions.py index f0ffbbb90f..236243b231 100755 --- a/intelmq/tests/lib/test_exceptions.py +++ b/intelmq/tests/lib/test_exceptions.py @@ -66,6 +66,10 @@ def test_MissingDependencyError(self): exc = str(excs.MissingDependencyError(depname, additional_text=additional)) self.assertIn(repr(depname), exc) self.assertTrue(exc.endswith(" %s" % additional)) + + def test_MissingPackerError(self): + exc = str(excs.MissingPackerError('non_existing_packer')) + self.assertIn(repr('non_existing_packer'), exc) if __name__ == '__main__': # pragma: no cover diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index 9f9c2ddb4a..b995030c4e 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -10,7 +10,9 @@ Most tests are performed on Report, as it is formally the same as Message, but has a valid Harmonization configuration. """ +from cmath import exp import json +import os import unittest import pkg_resources @@ -158,12 +160,20 @@ def test_event_ne_different_config(self): def test_invalid_type(self): """ Test if Message raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize('{"__type": "Message"}', harmonization=HARM) + data = message.MessageFactory.serialize({"__type": "Message"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) def test_invalid_type2(self): """ Test if MessageFactory raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize('{"__type": "Invalid"}', harmonization=HARM) + data = message.MessageFactory.serialize({"__type": "Invalid"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + + def test_missing_packer(self): + """ Test if MessageFactory raises MissingPackerError if a non existing packer is being used. """ + with self.assertRaises(exceptions.MissingPackerError): + data = message.MessageFactory.serialize({"__type": "Invalid"}, use_packer='non_existing_packer') + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer='non_existing_packer') def test_report_invalid_key(self): """ Test if report raises InvalidKey for invalid key in add(). """ @@ -364,11 +374,17 @@ def test_factory_serialize(self): report.add('feed.name', 'Example') report.add('feed.url', URL_SANE) report.add('raw', LOREM_BASE64, sanitize=False) - actual = message.MessageFactory.serialize(report) - expected = ('{"raw": "bG9yZW0gaXBzdW0=", "__type": "Report", "feed.url' - '": "https://example.com/", "feed.name": "Example"}') - self.assertDictEqual(json.loads(expected), - json.loads(actual)) + actual = message.MessageFactory.serialize(report, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + expected = message.MessageFactory.serialize({ + 'feed.name': 'Example', + 'feed.url': 'https://example.com/', + 'raw': 'bG9yZW0gaXBzdW0=', + '__type': 'Report', + }, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + self.assertDictEqual( + message.MessageFactory.deserialize(expected, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')), + message.MessageFactory.deserialize(actual, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + ) def test_deep_copy_content(self): """ Test if deep_copy does return the same items. """ @@ -496,45 +512,11 @@ def test_event_dict_hierarchical(self): '00:00'}}, event.to_dict(hierarchical=True)) - def test_event_json(self): - """ Test Event to_json. """ - event = self.new_event() - event = self.add_event_examples(event) - actual = event.to_json() - self.assertIsInstance(actual, str) - expected = ('{"feed.url": "https://example.com/", "feed.name": ' - '"Example", "raw": "bG9yZW0gaXBzdW0=", "time.observation": ' - '"2015-01-01T13:37:00+00:00"}') - self.assertDictEqual(json.loads(expected), json.loads(actual)) - - def test_event_json_hierarchical(self): - """ Test Event to_json. """ - event = self.new_event() - event = self.add_event_examples(event) - actual = event.to_json(hierarchical=True) - self.assertIsInstance(actual, str) - expected = ('{"feed": {"url": "https://example.com/", "name": ' - '"Example"}, "raw": "bG9yZW0gaXBzdW0=", "time": ' - '{"observation": "2015-01-01T13:37:00+00:00"}}') - self.assertDictEqual(json.loads(expected), json.loads(actual)) - def test_event_serialize(self): """ Test Event serialize. """ event = self.new_event() - self.assertEqual('{"__type": "Event"}', - event.serialize()) - - def test_event_string(self): - """ Test Event serialize. """ - event = self.new_event() - self.assertEqual('{"__type": "Event"}', - event.serialize()) - - def test_event_unicode(self): - """ Test Event serialize. """ - event = self.new_event() - self.assertEqual('{"__type": "Event"}', - event.serialize()) + expected = message.MessageFactory.serialize({'__type': 'Event'}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + self.assertEqual(expected, event.serialize()) def test_event_from_report(self): """ Data from report should be in event, except for extra. """ @@ -599,9 +581,9 @@ def test_event_init_check_tuple(self): def test_event_init(self): """ Test if initialization method checks fields. """ - event = '{"__type": "Event", "source.asn": "foo"}' + event = message.MessageFactory.serialize({"__type": "Event", "source.asn": "foo"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) with self.assertRaises(exceptions.InvalidValue): - message.MessageFactory.unserialize(event, harmonization=HARM) + message.MessageFactory.deserialize(event, harmonization=HARM) def test_malware_hash_md5(self): """ Test if MD5 is checked correctly. """ @@ -696,11 +678,9 @@ def test_message_extra_set_oldstyle_dict_overwrite_empty(self): """ event = self.new_event() event["extra"] = {"a": {"x": 1}, "b": "foo"} - self.assertEqual(json.loads(event['extra']), - {"a": {"x": 1}, "b": "foo"}) + self.assertEqual(json.loads(event['extra']), {"a": {"x": 1}, "b": "foo"}) event.add("extra", {"a": {}}, overwrite=True) - self.assertEqual(json.loads(event['extra']), - {"a": {}}) + self.assertEqual(json.loads(event['extra']), {"a": {}}) def test_message_extra_set_dict_empty(self): """ @@ -708,8 +688,7 @@ def test_message_extra_set_dict_empty(self): """ event = self.new_event() event.add('extra', {"foo": ''}) - self.assertEqual(json.loads(event['extra']), - {"foo": ''}) + self.assertEqual(json.loads(event["extra"]), {"foo": ""}) def test_message_extra_in_backwardcomp(self): """ diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index b5ec40668b..35b8b334a0 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -233,8 +233,8 @@ def set_bot(cls): def test_event(self): self.run_bot() - self.assertMessageEqual(0, EXAMPLE_JSON_STREAM_EVENTS[0]) - self.assertMessageEqual(1, EXAMPLE_JSON_STREAM_EVENTS[1]) + self.assertMessageEqual(0, EXAMPLE_JSON_STREAM_EVENTS[0], use_packer='json') + self.assertMessageEqual(1, EXAMPLE_JSON_STREAM_EVENTS[1], use_packer='json') def dump_message(self, error_traceback, message=None): self.assertDictEqual(JSON_STREAM_BOGUS_DUMP[self.call_counter], message) diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index 39f75eb0fb..f0d46c116e 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -20,6 +20,7 @@ import intelmq.lib.pipeline as pipeline import intelmq.lib.test as test import intelmq.lib.exceptions as exceptions +import intelmq.lib.utils as utils SAMPLES = {'normal': [b'Lorem ipsum dolor sit amet', 'Lorem ipsum dolor sit amet'], @@ -67,7 +68,7 @@ def setUp(self): def test_receive(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][0], self.pipe.receive()) def test_send(self): self.pipe.send(SAMPLES['normal'][1]) @@ -76,7 +77,7 @@ def test_send(self): def test_receive_unicode(self): self.pipe.state['test-bot-input'] = [SAMPLES['unicode'][0]] - self.assertEqual(SAMPLES['unicode'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['unicode'][0], self.pipe.receive()) def test_send_unicode(self): self.pipe.send(SAMPLES['unicode'][1]) @@ -161,12 +162,12 @@ def test_send_receive(self): """ Sending bytest and receiving unicode. """ self.clear() self.pipe.send(SAMPLES['normal'][0]) - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_send_receive_unicode(self): self.clear() self.pipe.send(SAMPLES['unicode'][1]) - self.assertEqual(SAMPLES['unicode'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['unicode'][1], utils.decode(self.pipe.receive())) def test_count(self): self.clear() @@ -185,7 +186,7 @@ def test_reject(self): self.pipe.send(SAMPLES['normal'][0]) self.pipe.receive() self.pipe.reject_message() - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_acknowledge(self): self.pipe.send(SAMPLES['normal'][0]) diff --git a/setup.py b/setup.py index 1611d8c46d..47df402eef 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'redis>=2.10', 'requests>=2.2.0', 'ruamel.yaml', + 'msgpack>=0.5', ] exec(open(os.path.join(os.path.dirname(__file__),