Skip to content

Commit

Permalink
Merging Changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcpsec committed Apr 27, 2015
2 parents 0197f89 + 9bc3648 commit a2dcb7e
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 24 deletions.
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ Bugfix release, and also making it easier to install and use the system with Ven
* Correct enrichment of FQDN indicators - it extracts all the related IPv4s and enriches them further ([#36](https://github.com/mlsecproject/combine/issues/36))
* Added contributing document to repository ([#127](https://github.com/mlsecproject/combine/issues/127))


#### 0.1.2 Bouncing Capybara
This is a bugfix release with several stability and performance improvements

Expand Down
1 change: 0 additions & 1 deletion inbound_urls.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,3 @@ http://www.autoshun.org/files/shunlist.csv
http://charles.the-haleys.org/ssh_dico_attack_hdeny_format.php/hostsdeny.txt
http://virbl.org/download/virbl.dnsbl.bit.nl.txt
http://botscout.com/last_caught_cache.htm
file:///path/to/your/inbound_file
2 changes: 1 addition & 1 deletion outbound_urls.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ http://reputation.alienvault.com/reputation.data
http://www.nothink.org/blacklist/blacklist_malware_dns.txt
http://www.nothink.org/blacklist/blacklist_malware_http.txt
http://www.nothink.org/blacklist/blacklist_malware_irc.txt
file:///path/to/your/outbound_file
https://feodotracker.abuse.ch/blocklist/?download=ipblocklist
64 changes: 43 additions & 21 deletions winnower.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from sortedcontainers import SortedDict

from logger import get_logger
import logging

logger = get_logger('winnower')

# from http://en.wikipedia.org/wiki/Reserved_IP_addresses:
reserved_ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24',
'198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '233.252.0.0/24'])
gi_org = SortedDict()
geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE)


def load_gi_org(filename):
Expand All @@ -43,32 +43,50 @@ def org_by_addr(address):


def maxhits(dns_records):
max = 0
hmax = 0
hostname = None
for record in dns_records:
if record['count'] > max:
max = record['count']
#logger.info("examining %s" % record)
if record['count'] > hmax:
hmax = record['count']
hostname = record['rrname'].rstrip('.')
return hostname


def enrich_IPv4(address, geo_data, dnsdb=None):
def maxhits_rdata(dns_records):
hmax = 0
hostname = None
for record in dns_records:
# logger.info("Examining %s" % record)
if record['count'] > hmax:
hmax = record['count']
hostname = record['rdata'][0].rstrip('.')
return hostname


def enrich_IPv4(address, dnsdb=None, hostname=None):
as_num, as_name = org_by_addr(address)
country = geo_data.country_code_by_addr('%s' % address)
if dnsdb:
hostname = maxhits(dnsdb.query_rdata_ip('%s' % address))
inaddr = address.reverse_dns
rhost = maxhits_rdata(dnsdb.query_rrset('%s' % inaddr))
else:
hostname = None
return (as_num, as_name, country, None, hostname)
rhost = None
return (as_num, as_name, country, hostname, rhost)


def enrich_FQDN(address, date, dnsdb):
records = dnsdb.query_rrset(address, rrtype='A')
records = filter_date(records, date)
ip_addr = maxhits(records)
if ip_addr:
logger.info('Mapped %s to %s' % (address, ip_addr))
return ip_addr
yesterday = dt.datetime.strptime(date, '%Y-%m-%d') - dt.timedelta(days=1)
yesterday_str = yesterday.strftime('%Y-%m-%d')
records = filter_date(records, yesterday_str)
enrichment = []
if not records:
return None
for ip_addr in records[0]['rdata']:
ip_addr_data = enrich_IPv4(IPAddress(ip_addr), dnsdb, address)
enrichment.append((ip_addr,) + ip_addr_data)
return enrichment


def filter_date(records, date):
Expand Down Expand Up @@ -113,15 +131,15 @@ def winnow(in_file, out_file, enr_file):
server = config.get('Winnower', 'dnsdb_server')
api = config.get('Winnower', 'dnsdb_api')
enrich_ip = config.get('Winnower', 'enrich_ip')
if enrich_ip == '1':
if enrich_ip == '1' or enrich_ip == 'True':
enrich_ip = True
logger.info('Enriching IPv4 indicators: TRUE')
else:
enrich_ip = False
logger.info('Enriching IPv4 indicators: FALSE')

enrich_dns = config.get('Winnower', 'enrich_dns')
if enrich_dns == '1':
if enrich_dns == '1' or enrich_dns == 'True':
enrich_dns = True
logger.info('Enriching DNS indicators: TRUE')
else:
Expand All @@ -132,7 +150,7 @@ def winnow(in_file, out_file, enr_file):

# handle the case where we aren't using DNSDB
dnsdb = dnsdb_query.DnsdbClient(server, api)
if len(dnsdb.query_rdata_name('google.com')) == 0:
if api == 'YOUR_API_KEY_HERE' or len(dnsdb.query_rdata_name('google.com')) == 0:
dnsdb = None
logger.info('Invalid DNSDB configuration found')

Expand All @@ -142,33 +160,37 @@ def winnow(in_file, out_file, enr_file):
# TODO: make these locations configurable?
logger.info('Loading GeoIP data')
gi_org = load_gi_org('data/GeoIPASNum2.csv')
geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE)

wheat = []
enriched = []

logger.info('Beginning winnowing process')
for each in crop:
(addr, addr_type, direction, source, note, date) = each
# this should be refactored into appropriate functions
if addr_type == 'IPv4' and is_ipv4(addr):
#logger.info('Enriching %s' % addr)
ipaddr = IPAddress(addr)
if not reserved(ipaddr):
wheat.append(each)
if enrich_ip:
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data, dnsdb)
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, dnsdb)
enriched.append(e_data)
else:
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr, geo_data)
e_data = (addr, addr_type, direction, source, note, date) + enrich_IPv4(ipaddr)
enriched.append(e_data)
else:
logger.error('Found invalid address: %s from: %s' % (addr, source))
elif addr_type == 'FQDN' and is_fqdn(addr):
#logger.info('Enriching %s' % addr)
wheat.append(each)
if enrich_dns and dnsdb:
e_data = (addr, addr_type, direction, source, note, date, enrich_FQDN(addr, date, dnsdb))
enriched.append(e_data)
# print "Enriching %s" % addr
e_data = enrich_FQDN(addr, date, dnsdb)
if e_data:
for each in e_data:
datum = (each[0], "IPv4", direction, source, note, date) + each[1:]
enriched.append(datum)
else:
logger.error('Could not determine address type for %s listed as %s' % (addr, addr_type))

Expand Down

0 comments on commit a2dcb7e

Please sign in to comment.