|
1 | 1 | import re
|
2 |
| -from datetime import datetime |
| 2 | +from datetime import datetime, tzinfo, timedelta |
3 | 3 |
|
4 | 4 | import user_agents
|
5 | 5 |
|
@@ -58,23 +58,81 @@ def parse_user_agent(matched_strings):
|
58 | 58 |
|
59 | 59 | return matched_strings
|
60 | 60 |
|
| 61 | +class FixedOffset(tzinfo): |
| 62 | + """Fixed offset in minutes east from UTC.""" |
| 63 | + |
| 64 | + def __init__(self, string): |
| 65 | + #import pudb ; pudb.set_trace() |
| 66 | + if string[0] == '-': |
| 67 | + direction = -1 |
| 68 | + string = string[1:] |
| 69 | + elif string[0] == '+': |
| 70 | + direction = +1 |
| 71 | + string = string[1:] |
| 72 | + else: |
| 73 | + direction = +1 |
| 74 | + string = string |
| 75 | + |
| 76 | + hr_offset = int(string[0:2], 10) |
| 77 | + min_offset = int(string[2:3], 10) |
| 78 | + min_offset = hr_offset * 60 + min_offset |
| 79 | + min_offset = direction * min_offset |
| 80 | + |
| 81 | + self.__offset = timedelta(minutes = min_offset) |
| 82 | + |
| 83 | + self.__name = string |
| 84 | + |
| 85 | + def utcoffset(self, dt): |
| 86 | + return self.__offset |
| 87 | + |
| 88 | + def tzname(self, dt): |
| 89 | + return self.__name |
| 90 | + |
| 91 | + def dst(self, dt): |
| 92 | + return timedelta(0) |
| 93 | + |
| 94 | + def __repr__(self): |
| 95 | + return repr(self.__name) |
61 | 96 |
|
62 | 97 |
|
63 | 98 | def apachetime(s):
|
64 | 99 | """
|
65 | 100 | Given a string representation of a datetime in apache format (e.g.
|
66 |
| - "01/Sep/2012:06:05:11 +0000"), return the python datetime for that string |
| 101 | + "01/Sep/2012:06:05:11 +0000"), return the python datetime for that string, with timezone |
67 | 102 | """
|
68 | 103 | month_map = {'Jan': 1, 'Feb': 2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7,
|
69 | 104 | 'Aug':8, 'Sep': 9, 'Oct':10, 'Nov': 11, 'Dec': 12}
|
70 | 105 | s = s[1:-1]
|
71 |
| - return datetime(int(s[7:11]), month_map[s[3:6]], int(s[0:2]), \ |
72 |
| - int(s[12:14]), int(s[15:17]), int(s[18:20])) |
| 106 | + |
| 107 | + tz_string = s[21:26] |
| 108 | + tz = FixedOffset(tz_string) |
| 109 | + |
| 110 | + obj = datetime(year=int(s[7:11]), month=month_map[s[3:6]], day=int(s[0:2]), |
| 111 | + hour=int(s[12:14]), minute=int(s[15:17]), second=int(s[18:20]), |
| 112 | + tzinfo=tz ) |
| 113 | + |
| 114 | + return obj |
73 | 115 |
|
74 | 116 | def format_time(matched_strings):
|
| 117 | + |
75 | 118 | time_received = matched_strings['time_received']
|
| 119 | + |
| 120 | + # Parse it to a timezone string |
76 | 121 | obj = apachetime(time_received)
|
77 |
| - return {'time_received':time_received, 'time_received_datetimeobj': obj, 'time_received_isoformat': obj.isoformat()} |
| 122 | + |
| 123 | + # For backwards compatibility, time_received_datetimeobj is a naive |
| 124 | + # datetime, so we have to create a timezone less version |
| 125 | + naive_obj = obj.replace(tzinfo=None) |
| 126 | + |
| 127 | + utc = FixedOffset('0000') |
| 128 | + utc_obj = obj.astimezone(utc) |
| 129 | + |
| 130 | + return { |
| 131 | + 'time_received':time_received, |
| 132 | + 'time_received_datetimeobj': naive_obj, 'time_received_isoformat': naive_obj.isoformat(), |
| 133 | + 'time_received_tz_datetimeobj': obj, 'time_received_tz_isoformat': obj.isoformat(), |
| 134 | + 'time_received_utc_datetimeobj': utc_obj, 'time_received_utc_isoformat': utc_obj.isoformat(), |
| 135 | + } |
78 | 136 |
|
79 | 137 |
|
80 | 138 | FORMAT_STRINGS = [
|
|
0 commit comments