-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
99 lines (82 loc) · 2.29 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
__author__ = "Richard O'Dwyer"
__email__ = "richard@richard.do"
__license__ = "None"
import re
def process_log(log):
requests = get_requests(log)
files = get_files(requests)
totals = file_occur(files)
return totals
def get_requests(f):
log_line = f.read()
pat = (r''
'(\d+.\d+.\d+.\d+)\s-\s-\s' #IP address
'\[(.+)\]\s' #datetime
'"GET\s(.+)\s\w+/.+"\s\d+\s' #requested file
'\d+\s"(.+)"\s' #referrer
'"(.+)"' #user agent
)
requests = find(pat, log_line, None)
return requests
def find(pat, text, match_item):
match = re.findall(pat, text)
if match:
return match
else:
return False
def get_files(requests):
#get requested files with req
requested_files = []
for req in requests:
#req[2] for req file match, change to
#data you want to count totals
requested_files.append(req[2])
return requested_files
def file_occur(files):
#file occurrences in requested files
d = {}
for file in files:
d[file] = d.get(file,0)+1
return d
if __name__ == '__main__':
#nginx access log, standard format
log_file = open('20150217-access.log', 'r')
#return dict of files and total requests
library = process_log(log_file)
blacklist = {
"tag",
"wp-content",
"wp-includes",
"wp-admin",
"wp-activate",
"assets",
"badges",
"contacto",
"usuarios",
"page",
"category",
"Admin",
"admin",
"author",
"images",
"sitemap.xml",
"robots.txt",
"?"
}
postdictionary = {}
for key in library.keys():
print "key: %s , value: %s" % (key, library[key])
filterkey = key.split('/')[1]
if filterkey in blacklist:
continue
elif filterkey[:1] in blacklist:
continue
else:
if filterkey in postdictionary:
postdictionary[filterkey] = postdictionary[filterkey] + library[key]
continue
else:
postdictionary[filterkey] = library[key]
for key3 in postdictionary.keys():
print "key: %s , value: %s" % (key3, postdictionary[key3])
print len(postdictionary)