1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 __doc__ = """
21 This module contains the supporting classes for the Two Step Analysis user agent
22 algorithm that is used as the primary way to match user agents with the Java API
23 for the WURFL.
24
25 A description of the way the following source is intended to work can be found
26 within the source for the original Java API implementation here:
27 http://sourceforge.net/projects/wurfl/files/WURFL Java API/
28
29 The original Java code is GPLd and Copyright (c) WURFL-Pro srl
30 """
31
32 __author__ = "Armand Lynch <lyncha@users.sourceforge.net>"
33 __copyright__ = "Copyright 2010, Armand Lynch"
34 __license__ = "LGPL"
35 __url__ = "http://celljam.net/"
36 __version__ = "1.0.1"
37
38 import re
39
40
41
42
43
44
45 babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE)
46 uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE)
47 yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)",
48 re.UNICODE)
49 safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})",
50 re.UNICODE)
51 locale_re = re.compile(ur"(; [a-z]{2}(-[a-zA-Z]{0,2})?)", re.UNICODE)
52 serial_number_re = re.compile(ur"(\[(TF|NT|ST)[\d|X]+\])|(\/SN[\d|X]+)",
53 re.UNICODE)
61 """Replace the "via babelfish.yahoo.com" with ''"""
62
63 return babel_fish_re.sub('', user_agent)
64
67 """ Replaces the heading "BlackBerry" string with ''"""
68
69 try:
70 index = user_agent.index(u"BlackBerry")
71 if u"AppleWebKit" not in user_agent:
72 return user_agent[index:]
73 except ValueError:
74 pass
75 return user_agent
76
79 """Replace the trailing UP.Link ... with ''"""
80
81 return uplink_re.sub('', user_agent)
82
85 """Replace the "YesWAP mobile phone proxy" with ''"""
86
87 return yeswap_re.sub('', user_agent)
88
92
96
99 def normalizer(user_agent):
100
101 for f in funcs:
102 user_agent = f(user_agent)
103 return user_agent.replace(' ', ' ').strip()
104 return normalizer
105
106
107 generic = _combine_funcs(serial_no, blackberry, uplink, yeswap, babelfish,
108 locale_remover)
112 def combined_normalizer(user_agent):
113 user_agent = generic(user_agent)
114 return normalizer_func(user_agent)
115 combined_normalizer.__doc__ = normalizer_func.__doc__
116 return combined_normalizer
117
122 if search_string in user_agent:
123 start = user_agent.index(search_string)
124 user_agent = user_agent[start:start + vsn_size]
125 return user_agent
126
127
128 @prenormalized
129 -def chrome(user_agent):
132
133
134 @prenormalized
135 -def firefox(user_agent):
138
139
140 @prenormalized
141 -def konqueror(user_agent):
144
145
146 @prenormalized
147 -def msie(user_agent):
148
149 if u"MSIE" in user_agent:
150 user_agent = user_agent[0:user_agent.index(u"MSIE")+9]
151 return user_agent
152
153
154 @prenormalized
155 -def safari(user_agent):
156 """
157 Return the safari user agent stripping out all the characters between
158 U; and Safari/xxx
159
160 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18
161 becomes
162 Mozilla/5.0 (Macintosh Safari/525
163 """
164
165 match = safari_re.search(user_agent)
166 if match and len(match.groups()) >= 3:
167 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()])
168 return user_agent
169
170
171 @prenormalized
172 -def lg(user_agent):
173 try:
174 lg_index = user_agent.index(u"LG")
175 return user_agent[lg_index:]
176 except ValueError:
177 return user_agent
178
179
180 @prenormalized
181 -def maemo(user_agent):
182 try:
183 maemo_index = user_agent.index(u"Maemo")
184 return user_agent[maemo_index:]
185 except ValueError:
186 return user_agent
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217