-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogEntry.java
115 lines (86 loc) · 2.74 KB
/
LogEntry.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package haloog;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Represents a single log entry making easier to get informations from it
*/
import java.util.Locale;
import java.util.regex.*;
public class LogEntry {
public static final String NO_DOMAIN = "unknown domain";
/** Pattern representing the structure of a long entry **/
private static final String logEntryPattern = "^([\\w.]+) (\\S+) (.+?) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\S+) \"(.*?)\" \"(.*?)\"";
/** Pattern for recognizing a URL, based off RFC 3986 **/
private static final Pattern urlPattern = Pattern.compile(
"(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)"
+ "(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*"
+ "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)",
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
/** The entire log entry as string **/
private String logEntryLine;
/** The date of the log **/
private Date loggedTime;
/** The logged request **/
private String loggedRequest;
/** The logged referer **/
private String loggedReferer;
public LogEntry(String logEntryLine) {
this.logEntryLine = logEntryLine;
try {
parseLogEntry();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void parseLogEntry() throws ParseException {
Pattern p = Pattern.compile(logEntryPattern);
Matcher matcher = p.matcher(logEntryLine);
if (!matcher.matches()) {
System.err.append("Bad log entry (or problem with RE?):");
System.err.append(logEntryLine);
}
DateFormat format = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss X", Locale.US);
loggedTime = format.parse(matcher.group(4));
loggedRequest = matcher.group(5);
loggedReferer = matcher.group(8);
}
public Date getLoggedTime() {
return loggedTime;
}
public String getLoggedRequest() {
return loggedRequest;
}
public String getLoggedReferer() {
return loggedReferer;
}
/**
* Tries to extract the referer's domain. If it doesn't succeed, returns a string that reflects this fact.
*
* @return String | "unknown domain"
*/
public String getRefererDomain() {
// Look for an URL in the string
Matcher matcher = urlPattern.matcher(loggedReferer);
if (matcher.find()) {
// If one is found, try to get the domain (host) from it
URI domain;
try {
domain = new URI(matcher.group());
} catch (URISyntaxException e) {
return NO_DOMAIN;
}
try {
return domain.getHost().replace("www.", "");
} catch (NullPointerException e) {
return NO_DOMAIN;
}
// Yeah, I don't like it either
}
return "unknown domain";
}
}