Skip to content
This repository was archived by the owner on Aug 8, 2024. It is now read-only.

Commit e83c49b

Browse files
author
Rory McCann
committed
Fix bug with parsing timestamps out.
hacked it by presuming that %t (timestamps) are always wrapped in [ & ]. Tried to look at the httpd/apache source code, and I think it uses APR_TIME_T_FMT but couldn't see where that was defined. see #9
1 parent f96a5e5 commit e83c49b

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

apache_log_parser/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def format_time(matched_strings):
107107
[make_regex('%r'), '.*?', lambda match: 'request_first_line', extra_request_from_first_line], # First line of request
108108
[make_regex('%R'), '.*?', lambda match: 'handler', lambda matched_strings: matched_strings], # The handler generating the response (if any).
109109
[make_regex('%s'), '[0-9]+?', lambda match: 'status', lambda matched_strings: matched_strings], # Status. For requests that got internally redirected, this is the status of the *original* request --- %>s for the last.
110-
[make_regex('%t'), '.*?', lambda match: 'time_received', format_time], # Time the request was received (standard english format)
110+
[make_regex('%t'), '\[.*?\]', lambda match: 'time_received', format_time], # Time the request was received (standard english format)
111111
[make_regex('%\{[^\}]+?\}t'), '.*?', extract_inner_value("time_", "t") , lambda matched_strings: matched_strings], # The time, in the form given by format, which should be in strftime(3) format. (potentially localized)
112112
[make_regex('%\{[^\}]+?\}x'), '.*?', extract_inner_value("extension_", "x") , lambda matched_strings: matched_strings], # Extension value, e.g. mod_ssl protocol and cipher
113113
[make_regex('%T'), '.*?', lambda match: 'time_s', lambda matched_strings: matched_strings], # The time taken to serve the request, in seconds.

apache_log_parser/tests.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,28 @@ def test_pr8(self):
5252
'env_unique_id': 'VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH',
5353
'remote_ip': '192.168.1.100'})
5454

55+
def test_issue9(self):
56+
parser = apache_log_parser.Parser("%h %v %V %l %u %t %r %>s %b %{Referer}i %{User-agent}i")
57+
log = "10.1.1.1 T1 blah.foo.com - - [08/Mar/2015:18:06:58 -0400] GET /content_images/3/American-University-in-Cairo-AUC.jpeg.jpg HTTP/1.1 404 344 http://www.google.ie AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.76 Safari/537.36"
58+
data = parser.parse(log)
59+
self.assertEqual(data, {
60+
'status': '404',
61+
'request_header_referer': 'http://www.google.ie',
62+
'remote_user': '-',
63+
'server_name': 'T1',
64+
'request_http_ver': '1.1',
65+
'request_header_user_agent': '',
66+
'request_first_line': 'GET /content_images/3/American-University-in-Cairo-AUC.jpeg.jpg HTTP/1.1',
67+
'remote_logname': '-',
68+
'request_method': 'GET',
69+
'time_received_datetimeobj': datetime.datetime(2015, 3, 8, 18, 6, 58),
70+
'response_bytes_clf': '344',
71+
'server_name2': 'blah.foo.com',
72+
'request_url': '/content_images/3/American-University-in-Cairo-AUC.jpeg.jpg',
73+
'time_received_isoformat': '2015-03-08T18:06:58',
74+
'remote_host': '10.1.1.1',
75+
'time_received': '[08/Mar/2015:18:06:58 -0400]'
76+
})
77+
5578
if __name__ == '__main__':
5679
unittest.main()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy