-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapache_time_based.py
More file actions
33 lines (27 loc) · 827 Bytes
/
apache_time_based.py
File metadata and controls
33 lines (27 loc) · 827 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
__author__ = 'syedaali'
'''
This program will print out the top 10 most eventful times in Apache HTTPD
log file. The format it expects the log to be in is:
1.1.1.1 - - [28/Dec/2014:03:21:08 +0000] "GET /index.html HTTP/1.0" 200 \
15 "-" "-"
Most eventful times refers to the time that most events occurred in.
'''
import re
import collections
adict = collections.defaultdict(int)
p = re.compile(
r"""(?P<ip>\d+\.\d+\.\d+\.\d+)\s+
(?P<username>-)\s+
(?P<TZ>-)\s+
(?P<TS>\[.*?\])\s+
(?P<rest>.*)""",re.VERBOSE)
with open('access_log', 'r') as f:
for line in f:
match = re.search(p, line)
adict[match.group('TS')] += 1
counter = 0
for k,v in sorted(adict.iteritems(),key=lambda x:x[1],reverse=True):
counter += 1
if counter == 10:
break
print k,v