Package cssutils :: Package tests :: Package encutils
[hide private]
[frames] | no frames]

Source Code for Package cssutils.tests.encutils

  1  """ 
  2  tests for encutils.py 
  3  """ 
  4  __version__ = '0.8' 
  5   
  6  import httplib 
  7  from StringIO import StringIO 
  8  import sys 
  9  import unittest 
 10   
 11  try: 
 12      import encutils 
 13  except ImportError: 
 14      import cssutils.encutils as encutils 
 15   
 16  # helper log 
 17  log = encutils.buildlog(stream=StringIO())     
 18   
19 -class AutoEncodingTestCase(unittest.TestCase):
20
21 - def _fakeRes(self, content):
22 "build a fake HTTP response" 23 class FakeRes: 24 def __init__(self, content): 25 fp = StringIO(content) 26 self._info = httplib.HTTPMessage(fp)
27 28 def info(self): 29 return self._info
30 return FakeRes(content) 31
32 - def test_getTextTypeByMediaType(self):
33 "encutils._getTextTypeByMediaType" 34 tests = { 35 'application/xml': encutils._XML_APPLICATION_TYPE, 36 'application/xml-dtd': encutils._XML_APPLICATION_TYPE, 37 'application/xml-external-parsed-entity': encutils._XML_APPLICATION_TYPE, 38 'application/xhtml+xml': encutils._XML_APPLICATION_TYPE, 39 'text/xml': encutils._XML_TEXT_TYPE, 40 'text/xml-external-parsed-entity': encutils._XML_TEXT_TYPE, 41 'text/xhtml+xml': encutils._XML_TEXT_TYPE, 42 'text/html': encutils._HTML_TEXT_TYPE, 43 'text/css': encutils._TEXT_TYPE, 44 'text/plain': encutils._TEXT_TYPE, 45 'x/x': encutils._OTHER_TYPE, 46 'ANYTHING': encutils._OTHER_TYPE 47 } 48 for test, exp in tests.items(): 49 self.assertEqual( 50 exp, encutils._getTextTypeByMediaType(test, log=log))
51
52 - def test_getTextType(self):
53 "encutils._getTextType" 54 tests = { 55 u'\x00\x00\xFE\xFF<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 56 u'\xFF\xFE\x00\x00<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 57 u'\xFE\xFF<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 58 u'\xFF\xFE<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 59 u'\xef\xbb\xbf<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 60 u'<?xml version="1.0"': encutils._XML_APPLICATION_TYPE, 61 u'\x00\x00\xFE\xFFanything': encutils._OTHER_TYPE, 62 u'\xFF\xFE\x00\x00anything': encutils._OTHER_TYPE, 63 u'\xFE\xFFanything': encutils._OTHER_TYPE, 64 u'\xFF\xFEanything': encutils._OTHER_TYPE, 65 u'\xef\xbb\xbfanything': encutils._OTHER_TYPE, 66 u'x/x': encutils._OTHER_TYPE, 67 u'ANYTHING': encutils._OTHER_TYPE 68 } 69 for test, exp in tests.items(): 70 self.assertEqual( 71 exp, encutils._getTextType(test, log=log))
72
73 - def test_encodingByMediaType(self):
74 "encutils.encodingByMediaType" 75 tests = { 76 'application/xml': 'utf-8', 77 'application/xml-dtd': 'utf-8', 78 'application/xml-external-parsed-entity': 'utf-8', 79 'application/ANYTHING+xml': 'utf-8', 80 ' application/xml ': 'utf-8', 81 'text/xml': 'ascii', 82 'text/xml-external-parsed-entity': 'ascii', 83 'text/ANYTHING+xml': 'ascii', 84 'text/html': 'iso-8859-1', 85 'text/css': 'iso-8859-1', 86 'text/plain': 'iso-8859-1', 87 'ANYTHING': None 88 } 89 for test, exp in tests.items(): 90 self.assertEqual(exp, 91 encutils.encodingByMediaType(test, log=log))
92
93 - def test_getMetaInfo(self):
94 "encutils.getMetaInfo" 95 tests = { 96 """<meta tp-equiv='Content-Type' content='text/html; charset=ascii'>""": 97 (None, None), 98 """<meta http-equiv='ontent-Type' content='text/html; charset=ascii'>""": 99 (None, None), 100 101 """<meta http-equiv='Content-Type' content='text/html'>""": 102 ('text/html', None), 103 104 """<meta content='text/html' http-equiv='Content-Type'>""": 105 ('text/html', None), 106 """<meta content='text/html;charset=ascii' http-equiv='Content-Type'>""": 107 ('text/html', 'ascii'), 108 109 """<meta http-equiv='Content-Type' content='text/html ;charset=ascii'>""": 110 ('text/html', 'ascii'), 111 """<meta content='text/html;charset=iso-8859-1' http-equiv='Content-Type'>""": 112 ('text/html', 'iso-8859-1'), 113 """<meta http-equiv="Content-Type" content="text/html;charset = ascii">""": 114 ('text/html', 'ascii'), 115 116 """<meta http-equiv="Content-Type" content="text/html;charset=ascii;x=2">""": 117 ('text/html', 'ascii'), 118 """<meta http-equiv="Content-Type" content="text/html;x=2;charset=ascii">""": 119 ('text/html', 'ascii'), 120 """<meta http-equiv="Content-Type" content="text/html;x=2;charset=ascii;y=2">""": 121 ('text/html', 'ascii'), 122 123 """<meta http-equiv='Content-Type' content="text/html;charset=ascii">""": 124 ('text/html', 'ascii'), 125 """<meta http-equiv='Content-Type' content='text/html;charset=ascii' />""": 126 ('text/html', 'ascii'), 127 """<meta http-equiv = " Content-Type" content = " text/html;charset=ascii " >""": 128 ('text/html', 'ascii'), 129 """<meta http-equiv = " \n Content-Type " content = " \t text/html ; charset=ascii " >""": 130 ('text/html', 'ascii') 131 } 132 for test, exp in tests.items(): 133 self.assertEqual(exp, encutils.getMetaInfo(test, log=log))
134
135 - def test_detectXMLEncoding(self):
136 "encutils.detectXMLEncoding" 137 tests = { 138 # BOM 139 ('utf_32_be'): u'\x00\x00\xFE\xFFanything', 140 ('utf_32_le'): u'\xFF\xFE\x00\x00anything', 141 ('utf_16_be'): u'\xFE\xFFanything', 142 ('utf_16_le'): u'\xFF\xFEanything', 143 ('utf-8'): u'\xef\xbb\xbfanything', 144 # encoding= 145 ('ascii'): '<?xml version="1.0" encoding="ascii" ?>', 146 ('ascii'): "<?xml version='1.0' encoding='ascii' ?>", 147 ('iso-8859-1'): "<?xml version='1.0' encoding='iso-8859-1' ?>", 148 # default 149 ('utf-8'): '<?xml version="1.0" ?>', 150 ('utf-8'): '<?xml version="1.0"?><x encoding="ascii"/>' 151 } 152 for exp, test in tests.items(): 153 self.assertEqual(exp, encutils.detectXMLEncoding(test, log=log))
154
155 - def test_tryEncodings(self):
156 "encutils.tryEncodings" 157 try: 158 import chardet 159 tests = [ 160 ('ascii', 'abc'), 161 ('windows-1252', u'\xf6'), 162 ('ascii', u'\u1111') 163 ] 164 except ImportError: 165 tests = [ 166 ('ascii', 'abc'), 167 ('iso-8859-1', u'\xf6'), 168 ('utf-8', u'\u1111') 169 ] 170 for exp, test in tests: 171 self.assertEqual(exp, encutils.tryEncodings(test))
172 173 # (expectedencoding, expectedmismatch): (httpheader, filecontent) 174 fulltests = { 175 ('utf-8', False): ( 176 '''NoContentType''', '''OnlyText'''), 177 178 # --- application/xhtml+xml --- 179 # default enc 180 ('utf-8', False): ( 181 '''Content-Type: application/xhtml+xml''', 182 '''<?xml version="1.0" ?> 183 <example> 184 <meta http-equiv="Content-Type" 185 content="application/xhtml+xml"/> 186 </example>'''), 187 # header enc 188 ('iso-h', True): ( 189 '''Content-Type: application/xhtml+xml;charset=iso-H''', 190 '''<?xml version="1.0" ?> 191 <example> 192 <meta http-equiv="Content-Type" 193 content="application/xhtml+xml"/> 194 </example>'''), 195 # mismatch header - meta, meta ignored 196 ('iso-h', True): ( 197 '''Content-Type: application/xhtml+xml;charset=iso-H''', 198 '''<?xml version="1.0" ?> 199 <example> 200 <meta http-equiv="Content-Type" 201 content="application/xhtml+xml;charset=iso_M"/> 202 </example>'''), 203 # mismatch XML - meta, meta ignored 204 ('iso-x', False): ( 205 '''Content-Type: application/xhtml+xml''', 206 '''<?xml version="1.0" encoding="iso-X" ?> 207 <example> 208 <meta http-equiv="Content-Type" 209 content="application/xhtml+xml;charset=iso_M"/> 210 </example>'''), 211 # mismatch header and XML, header wins 212 ('iso-h', True): ( 213 '''Content-Type: application/xhtml+xml;charset=iso-H''', 214 '''<?xml version="1.0" encoding="iso-X" ?> 215 <example/>'''), 216 217 # --- text/xml --- 218 # default enc 219 ('ascii', False): ( 220 '''Content-Type: text/xml''', 221 '''<?xml version="1.0" ?> 222 <example> 223 <meta http-equiv="Content-Type" 224 content="text/xml"/> 225 </example>'''), 226 # header enc 227 ('iso-h', True): ( 228 '''Content-Type: text/xml;charset=iso-H''', 229 '''<?xml version="1.0" ?> 230 <example> 231 <meta http-equiv="Content-Type" 232 content="text/xml"/> 233 </example>'''), 234 # mismatch header - meta, meta ignored 235 ('iso-h', True): ( 236 '''Content-Type: text/xml;charset=iso-H''', 237 '''<?xml version="1.0" ?> 238 <example> 239 <meta http-equiv="Content-Type" 240 content="text/xml;charset=iso_M"/> 241 </example>'''), 242 # XML - meta, both ignored, use HTTP, meta completely ignored 243 ('ascii', False): ( 244 '''Content-Type: text/xml''', 245 '''<?xml version="1.0" encoding="iso-X" ?> 246 <example> 247 <meta http-equiv="Content-Type" 248 content="text/xml;charset=iso_M"/> 249 </example>'''), 250 # mismatch header and XML, XML ignored 251 ('iso-h', True): ( 252 '''Content-Type: text/xml;charset=iso-H''', 253 '''<?xml version="1.0" encoding="iso-X" ?> 254 <example/>'''), 255 256 # --- text/html --- 257 # no default enc 258 (None, False): ('Content-Type: text/html;', 259 '''<meta http-equiv="Content-Type" 260 content="text/html">'''), 261 # header enc 262 ('iso-h', True): ('Content-Type: text/html;charset=iso-H', 263 '''<meta http-equiv="Content-Type" 264 content="text/html">'''), 265 # meta enc 266 ('iso-m', False): ('Content-Type: text/html', 267 '''<meta http-equiv="Content-Type" 268 content="text/html;charset=iso-m">'''), 269 # mismatch header - meta, header wins 270 ('iso-h', True): ('Content-Type: text/html;charset=iso-H', 271 '''<meta http-equiv="Content-Type" 272 content="text/html;charset=iso-m">'''), 273 274 # no header: 275 (None, False): (None, 276 '''<meta http-equiv="Content-Type" 277 content="text/html;charset=iso-m">'''), 278 (None, False): (None, '''text'''), 279 ('utf-8', False): (None, '''<?xml version='''), 280 ('utf-8', False): (None, '''<?xml version='''), 281 ('iso-x', False): (None, '''<?xml version="1.0" encoding="iso-X"?>''') 282 } 283
284 - def test_getEncodingInfo(self):
285 "encutils.getEncodingInfo" 286 for exp, test in self.fulltests.items(): 287 header, text = test 288 if header: 289 res = encutils.getEncodingInfo(self._fakeRes(header), text) 290 else: 291 res = encutils.getEncodingInfo(text=text) 292 res = (res.encoding, res.mismatch) 293 self.assertEqual(exp, res)
294 295 296 if __name__ == '__main__': 297 unittest.main() 298