Package pywurfl :: Package algorithms :: Package wurfl :: Module normalizers
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms.wurfl.normalizers

  1  # pywurfl - Wireless Universal Resource File Tools in Python 
  2  # Copyright (C) 2006-2010 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = """ 
 21  This module contains the supporting classes for the Two Step Analysis user agent 
 22  algorithm that is used as the primary way to match user agents with the Java API 
 23  for the WURFL. 
 24   
 25  A description of the way the following source is intended to work can be found 
 26  within the source for the original Java API implementation here: 
 27  http://sourceforge.net/projects/wurfl/files/WURFL Java API/ 
 28   
 29  The original Java code is GPLd and Copyright (c) WURFL-Pro srl 
 30  """ 
 31   
 32  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 33  __copyright__ = "Copyright 2010, Armand Lynch" 
 34  __license__ = "LGPL" 
 35  __url__ = "http://celljam.net/" 
 36  __version__ = "1.0.1" 
 37   
 38  import re 
 39   
 40  #from pywurfl.algorithms.wurfl import utils 
 41   
 42   
 43  # generic user agent normalizers 
 44   
 45  babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE) 
 46  uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE) 
 47  yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)", 
 48                         re.UNICODE) 
 49  safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})", 
 50                         re.UNICODE) 
 51  locale_re = re.compile(ur"(; [a-z]{2}(-[a-zA-Z]{0,2})?)", re.UNICODE) 
 52  serial_number_re = re.compile(ur"(\[(TF|NT|ST)[\d|X]+\])|(\/SN[\d|X]+)", 
 53                                re.UNICODE) 
54 #ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE) 55 #novarra_google_re = re.compile(ur"(\sNovarra-Vision.*)|(,gzip\(gfe\)\s+\(via translate.google.com\))", re.UNICODE) 56 #gmcc_re = re.compile(ur"GMCC/\d\.\d", re.UNICODE) 57 #lguplus_re = re.compile(ur"Mozilla.*lgtelecom;.*;(.*);.*", re.UNICODE) 58 59 60 -def babelfish(user_agent):
61 """Replace the "via babelfish.yahoo.com" with ''""" 62 #print "normalizer babelfish" 63 return babel_fish_re.sub('', user_agent)
64
65 66 -def blackberry(user_agent):
67 """ Replaces the heading "BlackBerry" string with ''""" 68 #print "normalizer blackberry" 69 try: 70 index = user_agent.index(u"BlackBerry") 71 if u"AppleWebKit" not in user_agent: 72 return user_agent[index:] 73 except ValueError: 74 pass 75 return user_agent
76 82
83 84 -def yeswap(user_agent):
85 """Replace the "YesWAP mobile phone proxy" with ''""" 86 #print "normalizer yeswap" 87 return yeswap_re.sub('', user_agent)
88
89 90 -def locale_remover(user_agent):
91 return locale_re.sub('', user_agent, 1)
92
93 94 -def serial_no(user_agent):
95 return serial_number_re.sub("", user_agent, 1)
96
97 98 -def _combine_funcs(*funcs):
99 def normalizer(user_agent): 100 #print "applying default normalizer" 101 for f in funcs: 102 user_agent = f(user_agent) 103 return user_agent.replace(' ', ' ').strip()
104 return normalizer 105 106 107 generic = _combine_funcs(serial_no, blackberry, uplink, yeswap, babelfish, 108 locale_remover)
109 110 111 -def prenormalized(normalizer_func):
112 def combined_normalizer(user_agent): 113 user_agent = generic(user_agent) 114 return normalizer_func(user_agent)
115 combined_normalizer.__doc__ = normalizer_func.__doc__ 116 return combined_normalizer 117
118 119 # specific user agent normalizers 120 121 -def _specific_normalizer(user_agent, search_string, vsn_size):
122 if search_string in user_agent: 123 start = user_agent.index(search_string) 124 user_agent = user_agent[start:start + vsn_size] 125 return user_agent
126
127 128 @prenormalized 129 -def chrome(user_agent):
130 #print "chrome normalizer" 131 return _specific_normalizer(user_agent, u"Chrome", 8)
132
133 134 @prenormalized 135 -def firefox(user_agent):
136 #print "firefox normalizer" 137 return _specific_normalizer(user_agent, u"Firefox", 11)
138
139 140 @prenormalized 141 -def konqueror(user_agent):
142 #print "konqueror normalizer" 143 return _specific_normalizer(user_agent, u"Konqueror", 11)
144
145 146 @prenormalized 147 -def msie(user_agent):
148 #print "msie normalizer" 149 if u"MSIE" in user_agent: 150 user_agent = user_agent[0:user_agent.index(u"MSIE")+9] 151 return user_agent
152
153 154 @prenormalized 155 -def safari(user_agent):
156 """ 157 Return the safari user agent stripping out all the characters between 158 U; and Safari/xxx 159 160 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18 161 becomes 162 Mozilla/5.0 (Macintosh Safari/525 163 """ 164 #print "safari normalizer" 165 match = safari_re.search(user_agent) 166 if match and len(match.groups()) >= 3: 167 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()]) 168 return user_agent
169
170 171 @prenormalized 172 -def lg(user_agent):
173 try: 174 lg_index = user_agent.index(u"LG") 175 return user_agent[lg_index:] 176 except ValueError: 177 return user_agent
178
179 180 @prenormalized 181 -def maemo(user_agent):
182 try: 183 maemo_index = user_agent.index(u"Maemo") 184 return user_agent[maemo_index:] 185 except ValueError: 186 return user_agent
187 188 189 #def novarra_google_translator_remover(user_agent): 190 # return novarra_google_re.sub('', user_agent, 1) 191 192 #def opera(user_agent): 193 # #print "opera normalizer" 194 # return _specific_normalizer(user_agent, u"Opera", 7) 195 196 #def android(user_agent): 197 # #print "android normalizer" 198 # start = utils.ordinal_index(user_agent, ";", 3) 199 # end = utils.ordinal_index(user_agent, ";", 4) 200 # if start == -1 or end == -1: 201 # return user_agent 202 # return user_agent[:start] + user_agent[end:] 203 204 #def lguplus(user_agent): 205 # match = lguplus_re.search(user_agent) 206 # if match: 207 # user_agent = match.group(1) 208 # return user_agent 209 210 #def ibm_wbi(user_agent): 211 # #print "normalizer ibm_wbi" 212 # return ibm_wbi_re.sub('', user_agent) 213 214 #def gmcc(user_agent): 215 # #print "normalizer gmcc" 216 # return gmcc_re.sub('', user_agent) 217