From d4f835c687c9727aed1e738b815e0331444ead61 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Thu, 8 Jan 2015 17:59:22 -0600 Subject: [PATCH 01/14] Do not make DNSDB query if the API key is still default --- winnower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/winnower.py b/winnower.py index 60a30b7..b885809 100755 --- a/winnower.py +++ b/winnower.py @@ -132,7 +132,7 @@ def winnow(in_file, out_file, enr_file): # handle the case where we aren't using DNSDB dnsdb = dnsdb_query.DnsdbClient(server, api) - if len(dnsdb.query_rdata_name('google.com')) == 0: + if api == 'YOUR_API_KEY_HERE' or len(dnsdb.query_rdata_name('google.com')) == 0: dnsdb = None logger.info('Invalid DNSDB configuration found') From 781b2cc89bf85d2bed8cead7f01f0b55c03e1dd3 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Thu, 8 Jan 2015 17:59:22 -0600 Subject: [PATCH 02/14] Do not make DNSDB query if the API key is still default --- winnower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/winnower.py b/winnower.py index 60a30b7..b885809 100755 --- a/winnower.py +++ b/winnower.py @@ -132,7 +132,7 @@ def winnow(in_file, out_file, enr_file): # handle the case where we aren't using DNSDB dnsdb = dnsdb_query.DnsdbClient(server, api) - if len(dnsdb.query_rdata_name('google.com')) == 0: + if api == 'YOUR_API_KEY_HERE' or len(dnsdb.query_rdata_name('google.com')) == 0: dnsdb = None logger.info('Invalid DNSDB configuration found') From d1136d6dd1c38599525841918c741931cead250b Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Wed, 14 Jan 2015 13:14:50 -0600 Subject: [PATCH 03/14] Complete enrichment of FQDN --- winnower.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/winnower.py b/winnower.py index b885809..3fe4e47 100755 --- a/winnower.py +++ b/winnower.py @@ -20,6 +20,7 @@ reserved_ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '233.252.0.0/24']) gi_org = SortedDict() +geo_data = None def load_gi_org(filename): @@ -52,14 +53,14 @@ def maxhits(dns_records): return hostname -def enrich_IPv4(address, geo_data, dnsdb=None): +def enrich_IPv4(address, dnsdb=None, rhost=None): as_num, as_name = org_by_addr(address) country = geo_data.country_code_by_addr('%s' % address) if dnsdb: hostname = maxhits(dnsdb.query_rdata_ip('%s' % address)) else: hostname = None - return (as_num, as_name, country, None, hostname) + return (as_num, as_name, country, rhost, hostname) def enrich_FQDN(address, date, dnsdb): @@ -68,6 +69,7 @@ def enrich_FQDN(address, date, dnsdb): ip_addr = maxhits(records) if ip_addr: logger.info('Mapped %s to %s' % (address, ip_addr)) + ip_addr = enrich_IPv4(ip_addr, dnsdb, address) return ip_addr @@ -156,10 +158,10 @@ def winnow(in_file, out_file, enr_file): if not reserved(ipaddr): wheat.append(each) if enrich_ip: - e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data, dnsdb) + e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, dnsdb) enriched.append(e_data) else: - e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data) + e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr) enriched.append(e_data) else: logger.error('Found invalid address: %s from: %s' % (addr, source)) From 4a47265892eec0313b9f0b4af9d2b9fc7db4815a Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Wed, 14 Jan 2015 14:36:07 -0600 Subject: [PATCH 04/14] Read geoip data globally and add debug printing --- winnower.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/winnower.py b/winnower.py index 3fe4e47..00fbf03 100755 --- a/winnower.py +++ b/winnower.py @@ -20,7 +20,7 @@ reserved_ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '233.252.0.0/24']) gi_org = SortedDict() -geo_data = None +geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE) def load_gi_org(filename): @@ -144,7 +144,6 @@ def winnow(in_file, out_file, enr_file): # TODO: make these locations configurable? logger.info('Loading GeoIP data') gi_org = load_gi_org('data/GeoIPASNum2.csv') - geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE) wheat = [] enriched = [] @@ -152,12 +151,14 @@ def winnow(in_file, out_file, enr_file): logger.info('Beginning winnowing process') for each in crop: (addr, addr_type, direction, source, note, date) = each + # this should be refactored into appropriate functions if addr_type == 'IPv4' and is_ipv4(addr): #logger.info('Enriching %s' % addr) ipaddr = IPAddress(addr) if not reserved(ipaddr): wheat.append(each) if enrich_ip: + print "Enriching %s" % addr e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, dnsdb) enriched.append(e_data) else: @@ -169,6 +170,7 @@ def winnow(in_file, out_file, enr_file): #logger.info('Enriching %s' % addr) wheat.append(each) if enrich_dns and dnsdb: + print "Enriching %s" % addr e_data = (addr, addr_type, direction, source, note, date, enrich_FQDN(addr, date, dnsdb)) enriched.append(e_data) else: From 317aaa459bca648e18c4d99195abaa120a844b83 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Wed, 11 Mar 2015 13:17:22 -0500 Subject: [PATCH 05/14] Filter from one day in advance --- winnower.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/winnower.py b/winnower.py index 00fbf03..b6bb09d 100755 --- a/winnower.py +++ b/winnower.py @@ -65,7 +65,9 @@ def enrich_IPv4(address, dnsdb=None, rhost=None): def enrich_FQDN(address, date, dnsdb): records = dnsdb.query_rrset(address, rrtype='A') - records = filter_date(records, date) + yesterday = dt.datetime.strptime(date, '%Y-%m-%d') - dt.timedelta(days=1) + yesterday_str = yesterday.strftime('%Y-%m-%d') + records = filter_date(records, yesterday) ip_addr = maxhits(records) if ip_addr: logger.info('Mapped %s to %s' % (address, ip_addr)) @@ -158,7 +160,6 @@ def winnow(in_file, out_file, enr_file): if not reserved(ipaddr): wheat.append(each) if enrich_ip: - print "Enriching %s" % addr e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, dnsdb) enriched.append(e_data) else: From 319a3f0abed21d973fad9de4329978da32895a65 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Tue, 17 Mar 2015 13:41:23 -0500 Subject: [PATCH 06/14] Remove unused imports --- winnower.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/winnower.py b/winnower.py index b6bb09d..3496f9a 100755 --- a/winnower.py +++ b/winnower.py @@ -6,13 +6,11 @@ import json import pygeoip import re -import sys from netaddr import IPAddress, IPRange, IPSet from sortedcontainers import SortedDict from logger import get_logger -import logging logger = get_logger('winnower') From c5cb87662a8ce568418272467e861d178636895d Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Tue, 17 Mar 2015 13:42:10 -0500 Subject: [PATCH 07/14] Rename variable to avoid keyword conflict --- winnower.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/winnower.py b/winnower.py index 3496f9a..ff6bc30 100755 --- a/winnower.py +++ b/winnower.py @@ -42,11 +42,11 @@ def org_by_addr(address): def maxhits(dns_records): - max = 0 + hmax = 0 hostname = None for record in dns_records: - if record['count'] > max: - max = record['count'] + if record['count'] > hmax: + hmax = record['count'] hostname = record['rrname'].rstrip('.') return hostname From ace40b17ba446abc9c6004f81445c99fe24aae8a Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Tue, 17 Mar 2015 14:16:43 -0500 Subject: [PATCH 08/14] Correctly find IP address for name --- winnower.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/winnower.py b/winnower.py index ff6bc30..eff2083 100755 --- a/winnower.py +++ b/winnower.py @@ -45,12 +45,24 @@ def maxhits(dns_records): hmax = 0 hostname = None for record in dns_records: + #logger.info("examining %s" % record) if record['count'] > hmax: hmax = record['count'] hostname = record['rrname'].rstrip('.') return hostname +def maxhits_rdata(dns_records): + hmax = 0 + hostname = None + for record in dns_records: + logger.info("examining %s" % record) + if record['count'] > hmax: + hmax = record['count'] + hostname = record['rdata'][0].rstrip('.') + return hostname + + def enrich_IPv4(address, dnsdb=None, rhost=None): as_num, as_name = org_by_addr(address) country = geo_data.country_code_by_addr('%s' % address) @@ -65,11 +77,11 @@ def enrich_FQDN(address, date, dnsdb): records = dnsdb.query_rrset(address, rrtype='A') yesterday = dt.datetime.strptime(date, '%Y-%m-%d') - dt.timedelta(days=1) yesterday_str = yesterday.strftime('%Y-%m-%d') - records = filter_date(records, yesterday) - ip_addr = maxhits(records) + records = filter_date(records, yesterday_str) + ip_addr = maxhits_rdata(records) if ip_addr: - logger.info('Mapped %s to %s' % (address, ip_addr)) - ip_addr = enrich_IPv4(ip_addr, dnsdb, address) + logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) + ip_addr = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) return ip_addr From 8d7d221d5096b8dcfdc2547543959243f689f448 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Tue, 17 Mar 2015 14:56:22 -0500 Subject: [PATCH 09/14] Add IP enrichment to final set --- winnower.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/winnower.py b/winnower.py index eff2083..3f23477 100755 --- a/winnower.py +++ b/winnower.py @@ -56,7 +56,7 @@ def maxhits_rdata(dns_records): hmax = 0 hostname = None for record in dns_records: - logger.info("examining %s" % record) + # logger.info("Examining %s" % record) if record['count'] > hmax: hmax = record['count'] hostname = record['rdata'][0].rstrip('.') @@ -80,9 +80,9 @@ def enrich_FQDN(address, date, dnsdb): records = filter_date(records, yesterday_str) ip_addr = maxhits_rdata(records) if ip_addr: - logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) - ip_addr = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) - return ip_addr + # logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) + ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) + return (ip_addr,) + ip_addr_data def filter_date(records, date): @@ -182,7 +182,8 @@ def winnow(in_file, out_file, enr_file): wheat.append(each) if enrich_dns and dnsdb: print "Enriching %s" % addr - e_data = (addr, addr_type, direction, source, note, date, enrich_FQDN(addr, date, dnsdb)) + e_data = enrich_FQDN(addr, date, dnsdb) + e_data = (e_data[0], "IPv4", direction, source, note, date, e_data[1:]) enriched.append(e_data) else: logger.error('Could not determine address type for %s listed as %s' % (addr, addr_type)) From ba6a6c887b8c801163fed3b6d464f3ec35e9458d Mon Sep 17 00:00:00 2001 From: Alexandre Pinto Date: Fri, 3 Apr 2015 11:01:26 -0700 Subject: [PATCH 10/14] Some bugfixes on DNS enrichment code in winnower (#36) --- winnower.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/winnower.py b/winnower.py index 3f23477..61f1af1 100755 --- a/winnower.py +++ b/winnower.py @@ -63,14 +63,14 @@ def maxhits_rdata(dns_records): return hostname -def enrich_IPv4(address, dnsdb=None, rhost=None): +def enrich_IPv4(address, dnsdb=None, hostname=None): as_num, as_name = org_by_addr(address) country = geo_data.country_code_by_addr('%s' % address) if dnsdb: - hostname = maxhits(dnsdb.query_rdata_ip('%s' % address)) + rhost = maxhits(dnsdb.query_rdata_ip('%s' % address)) else: - hostname = None - return (as_num, as_name, country, rhost, hostname) + rhost = None + return (as_num, as_name, country, hostname, rhost) def enrich_FQDN(address, date, dnsdb): @@ -82,7 +82,9 @@ def enrich_FQDN(address, date, dnsdb): if ip_addr: # logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) - return (ip_addr,) + ip_addr_data + return (ip_addr,) + ip_addr_data + else: + return None def filter_date(records, date): @@ -127,7 +129,7 @@ def winnow(in_file, out_file, enr_file): server = config.get('Winnower', 'dnsdb_server') api = config.get('Winnower', 'dnsdb_api') enrich_ip = config.get('Winnower', 'enrich_ip') - if enrich_ip == '1': + if enrich_ip == '1' or enrich_ip == 'True': enrich_ip = True logger.info('Enriching IPv4 indicators: TRUE') else: @@ -135,7 +137,7 @@ def winnow(in_file, out_file, enr_file): logger.info('Enriching IPv4 indicators: FALSE') enrich_dns = config.get('Winnower', 'enrich_dns') - if enrich_dns == '1': + if enrich_dns == '1' or enrich_dns == 'True': enrich_dns = True logger.info('Enriching DNS indicators: TRUE') else: @@ -181,10 +183,11 @@ def winnow(in_file, out_file, enr_file): #logger.info('Enriching %s' % addr) wheat.append(each) if enrich_dns and dnsdb: - print "Enriching %s" % addr + # print "Enriching %s" % addr e_data = enrich_FQDN(addr, date, dnsdb) - e_data = (e_data[0], "IPv4", direction, source, note, date, e_data[1:]) - enriched.append(e_data) + if e_data: + e_data = (e_data[0], "IPv4", direction, source, note, date) + e_data[1:] + enriched.append(e_data) else: logger.error('Could not determine address type for %s listed as %s' % (addr, addr_type)) From ed398a63bfd23db9cebc925817b41edaa3c4629d Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Fri, 3 Apr 2015 14:39:20 -0500 Subject: [PATCH 11/14] Look up PTR for in-addr.arpa record instead --- winnower.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/winnower.py b/winnower.py index 61f1af1..6474908 100755 --- a/winnower.py +++ b/winnower.py @@ -6,6 +6,7 @@ import json import pygeoip import re +import sys from netaddr import IPAddress, IPRange, IPSet from sortedcontainers import SortedDict @@ -67,7 +68,8 @@ def enrich_IPv4(address, dnsdb=None, hostname=None): as_num, as_name = org_by_addr(address) country = geo_data.country_code_by_addr('%s' % address) if dnsdb: - rhost = maxhits(dnsdb.query_rdata_ip('%s' % address)) + inaddr = address.reverse_dns + rhost = maxhits(dnsdb.query_rrset('%s' % inaddr)) else: rhost = None return (as_num, as_name, country, hostname, rhost) From 3a8964887a1bae19e0a2bf4aa1d7fbf375b138e2 Mon Sep 17 00:00:00 2001 From: Kyle Maxwell Date: Fri, 10 Apr 2015 21:37:03 +0000 Subject: [PATCH 12/14] Return correct DNS enrichments --- winnower.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/winnower.py b/winnower.py index 6474908..9fd5688 100755 --- a/winnower.py +++ b/winnower.py @@ -80,13 +80,13 @@ def enrich_FQDN(address, date, dnsdb): yesterday = dt.datetime.strptime(date, '%Y-%m-%d') - dt.timedelta(days=1) yesterday_str = yesterday.strftime('%Y-%m-%d') records = filter_date(records, yesterday_str) - ip_addr = maxhits_rdata(records) - if ip_addr: - # logger.info('Mapped %s to %s on %s' % (address, ip_addr, date)) - ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) - return (ip_addr,) + ip_addr_data - else: + enrichment = [] + if not records: return None + for ip_addr in records[0]['rdata']: + ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address) + enrichment.append((ip_addr,) + ip_addr_data) + return enrichment def filter_date(records, date): @@ -188,8 +188,9 @@ def winnow(in_file, out_file, enr_file): # print "Enriching %s" % addr e_data = enrich_FQDN(addr, date, dnsdb) if e_data: - e_data = (e_data[0], "IPv4", direction, source, note, date) + e_data[1:] - enriched.append(e_data) + for each in e_data: + datum = (each[0], "IPv4", direction, source, note, date) + each[1:] + enriched.append(datum) else: logger.error('Could not determine address type for %s listed as %s' % (addr, addr_type)) From 94feb2974ee4ced63766a72c7c3f10a851db297f Mon Sep 17 00:00:00 2001 From: Alexandre Pinto Date: Sat, 25 Apr 2015 18:12:52 -0700 Subject: [PATCH 13/14] Fixing rhost query so it returns the rdata, not the rrset (#36) --- winnower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/winnower.py b/winnower.py index 9fd5688..774f145 100755 --- a/winnower.py +++ b/winnower.py @@ -69,7 +69,7 @@ def enrich_IPv4(address, dnsdb=None, hostname=None): country = geo_data.country_code_by_addr('%s' % address) if dnsdb: inaddr = address.reverse_dns - rhost = maxhits(dnsdb.query_rrset('%s' % inaddr)) + rhost = maxhits_rdata(dnsdb.query_rrset('%s' % inaddr)) else: rhost = None return (as_num, as_name, country, hostname, rhost) From 239f0c32f0cae0770f8d53fe7ac06b845129b0d9 Mon Sep 17 00:00:00 2001 From: Alexandre Pinto Date: Sat, 25 Apr 2015 18:17:13 -0700 Subject: [PATCH 14/14] Updating CHANGELOG (#36) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6f2957..3945fa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Bugfix release, and also making it easier to install and use the system with Ven * Docker file and usage guide ([#117](https://github.com/mlsecproject/combine/issues/117)) * Instalation documentation using Python venv ([#115](https://github.com/mlsecproject/combine/issues/115)) * Minor cleanup on gitignore and other files ([#109](https://github.com/mlsecproject/combine/issues/109)) +* Correct enrichment of FQDN indicators - it extracts all the related IPv4s and enriches them further ([#36](https://github.com/mlsecproject/combine/issues/36)) #### 0.1.2 Bouncing Capybara This is a bugfix release with several stability and performance improvements