From debd147c5aed15a97a6e5840e8433879c6b71359 Mon Sep 17 00:00:00 2001 From: Adrian Moen Date: Fri, 13 Sep 2019 10:51:47 +0200 Subject: [PATCH 1/4] Improved parser to better handle description --- intelmq/bots/parsers/bambenek/parser.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 3c5370f469..4c4e58a2e5 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """ IntelMQ parser for Bambenek DGA, Domain, and IP feeds """ +import re from intelmq.lib.bot import ParserBot @@ -32,32 +33,34 @@ def parse_line(self, line, report): self.tempdata.append(line) else: - value = line.split(',') + m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*),(?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) + values = m.groups() + event = self.new_event(report) - event.add('event_description.text', value[1]) - event.add('event_description.url', value[3]) + event.add('event_description.text', values[1]) + event.add('event_description.url', values[3]) event.add('raw', line) # last column is a url with malware named txt file link - malware_name = value[-1].split('/')[-1].split('.')[0] + malware_name = values[-1].split('/')[-1].split('.')[0] event.add('malware.name', self.MALWARE_NAME_MAP.get(malware_name, malware_name)) if report['feed.url'] in BambenekParserBot.IPMASTERLIST: - event.add('source.ip', value[0]) - event.add('time.source', value[2] + ' UTC') + event.add('source.ip', values[0]) + event.add('time.source', values[2] + ' UTC') event.add('classification.type', 'c2server') event.add('status', 'online') elif report['feed.url'] in BambenekParserBot.DOMMASTERLIST: - event.add('source.fqdn', value[0]) - event.add('time.source', value[2] + ' UTC') + event.add('source.fqdn', values[0]) + event.add('time.source', values[2] + ' UTC') event.add('classification.type', 'c2server') event.add('status', 'online') elif report['feed.url'] in BambenekParserBot.DGA_FEED: - event.add('source.fqdn', value[0]) - event.add('time.source', value[2] + ' 00:00 UTC') + event.add('source.fqdn', values[0]) + event.add('time.source', values[2] + ' 00:00 UTC') event.add('classification.type', 'dga domain') else: From e5b4372b243b36bc9ca1a2595faad9a578888227 Mon Sep 17 00:00:00 2001 From: Adrian Moen Date: Fri, 13 Sep 2019 12:20:37 +0200 Subject: [PATCH 2/4] Removed whitespace and broke down regex expression --- intelmq/bots/parsers/bambenek/parser.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 4c4e58a2e5..160e0bd646 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -33,11 +33,10 @@ def parse_line(self, line, report): self.tempdata.append(line) else: - m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*),(?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) + m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*), \ + (?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) values = m.groups() - event = self.new_event(report) - event.add('event_description.text', values[1]) event.add('event_description.url', values[3]) event.add('raw', line) From 56021be2455b1e88780e421fc6d8e210776416d3 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 18 Sep 2019 16:18:16 +0200 Subject: [PATCH 3/4] TST: add bambenek tests for commas in descriptions see certtools/intelmq#1451 --- intelmq/bots/parsers/bambenek/parser.py | 4 ++-- .../bots/parsers/bambenek/c2-ipmasterlist.txt | 15 ++++++------- .../bots/parsers/bambenek/test_parser.py | 21 ++++++++++++++++--- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 160e0bd646..2784dfcdf1 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -33,8 +33,8 @@ def parse_line(self, line, report): self.tempdata.append(line) else: - m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*), \ - (?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) + m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*), " + r"(?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) values = m.groups() event = self.new_event(report) event.add('event_description.text', values[1]) diff --git a/intelmq/tests/bots/parsers/bambenek/c2-ipmasterlist.txt b/intelmq/tests/bots/parsers/bambenek/c2-ipmasterlist.txt index 0432f6da64..d6693e7ab4 100644 --- a/intelmq/tests/bots/parsers/bambenek/c2-ipmasterlist.txt +++ b/intelmq/tests/bots/parsers/bambenek/c2-ipmasterlist.txt @@ -1,17 +1,18 @@ ############################################################# -## Master Feed of known, active and non-sinkholed C&Cs IP +## Master Feed of known, active and non-sinkholed C&Cs IP ## addresses -## -## Feed generated at: 2016-11-12 18:17 +## +## Feed generated at: 2016-11-12 18:17 ## ## Feed Provided By: John Bambenek of Bambenek Consulting ## jcb@bambenekconsulting.com // http://bambenekconsulting.com -## Use of this feed is governed by the license here: -## http://osint.bambenekconsulting.com/license.txt +## Use of this feed is governed by the license here: +## http://osint.bambenekconsulting.com/license.txt ## -## For more information on this feed go to: +## For more information on this feed go to: ## http://osint.bambenekconsulting.com/manual/c2-ipmasterlist.txt -## +## ## All times are in UTC ############################################################# 213.247.47.190,IP used by shiotob/urlzone/bebloh C&C,2016-11-12 18:02,http://osint.bambenekconsulting.com/manual/bebloh.txt +64.183.187.20,IP resolved by necurs C&C, uses encoded IP, this is not the C2 IP, 2019-09-17 06:06,http://osint.bambenekconsulting.com/manual/necurs.txt diff --git a/intelmq/tests/bots/parsers/bambenek/test_parser.py b/intelmq/tests/bots/parsers/bambenek/test_parser.py index 01038971db..4635e6ae7f 100644 --- a/intelmq/tests/bots/parsers/bambenek/test_parser.py +++ b/intelmq/tests/bots/parsers/bambenek/test_parser.py @@ -45,7 +45,7 @@ 'time.observation': '2016-01-01T00:00:00+00:00' } -IP_EVENTS = {'feed.url': 'http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt', +IP_EVENTS = [{'feed.url': 'http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt', 'feed.name': 'Bambenek C2 IP Feed', '__type': 'Event', 'time.observation': '2016-01-01T00:00:00+00:00', @@ -57,7 +57,21 @@ 'time.source': '2016-11-12T18:02:00+00:00', 'event_description.text': 'IP used by shiotob/urlzone/bebloh C&C', 'event_description.url': 'http://osint.bambenekconsulting.com/manual/bebloh.txt' - } + }, + {'feed.url': 'http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt', + 'feed.name': 'Bambenek C2 IP Feed', + '__type': 'Event', + 'time.observation': '2016-01-01T00:00:00+00:00', + 'raw': 'NjQuMTgzLjE4Ny4yMCxJUCByZXNvbHZlZCBieSBuZWN1cnMgQyZDLCB1c2VzIGVuY29kZWQgSVAsIHRoaXMgaXMgbm90IHRoZSBDMiBJUCwgMjAxOS0wOS0xNyAwNjowNixodHRwOi8vb3NpbnQuYmFtYmVuZWtjb25zdWx0aW5nLmNvbS9tYW51YWwvbmVjdXJzLnR4dAo=', + 'source.ip': '64.183.187.20', + 'malware.name': 'necurs', + 'classification.type': 'c2server', + 'status': 'online', + 'time.source': '2019-09-17T06:06:00+00:00', + 'event_description.text': 'IP resolved by necurs C&C, uses encoded IP, this is not the C2 IP', + 'event_description.url': 'http://osint.bambenekconsulting.com/manual/necurs.txt' + }, + ] DGA_REPORT = {'feed.url': 'http://osint.bambenekconsulting.com/feeds/dga-feed.txt', 'feed.name': 'Bambenek DGA Domain Feed', @@ -95,7 +109,8 @@ def test_domain(self): def test_ip(self): self.input_message = IP_REPORT self.run_bot() - self.assertMessageEqual(0, IP_EVENTS) + self.assertMessageEqual(0, IP_EVENTS[0]) + self.assertMessageEqual(1, IP_EVENTS[1]) def test_dga(self): self.input_message = DGA_REPORT From f8dfa9b776666051f15aa7e30ba7d0a85d14fb48 Mon Sep 17 00:00:00 2001 From: Adrian Moen Date: Thu, 19 Sep 2019 10:04:28 +0200 Subject: [PATCH 4/4] Corrected line-break in regex expression --- intelmq/bots/parsers/bambenek/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 160e0bd646..2784dfcdf1 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -33,8 +33,8 @@ def parse_line(self, line, report): self.tempdata.append(line) else: - m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*), \ - (?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) + m = re.match(r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}),(?P.*), " + r"(?P\d{4}-\d{2}-\d{2}[ ]\d{2}[:]\d{2}),(?P.*)", line) values = m.groups() event = self.new_event(report) event.add('event_description.text', values[1])