1
2
3
4
5
6 import os
7 import re
8 import sys
9 import urlparse
10 import zlib
11
12 try:
13 from cStringIO import StringIO
14 except ImportError:
15 from StringIO import StringIO
16
17 from .datastructures import MultiDict
18 from .errors import NoMoreData, ChunkMissingTerminator, \
19 InvalidChunkSize, InvalidRequestLine, InvalidHTTPVersion, \
20 InvalidHTTPStatus, InvalidHeader, InvalidHeaderName, HeaderLimit
21
22
24 - def __init__(self, sock, max_chunk=8192):
25 self.buf = StringIO()
26 self.sock = sock
27 self.max_chunk = max_chunk
28
30 return self.sock.recv(self.max_chunk)
31
32 - def read(self, size=None):
33 if size is not None and not isinstance(size, (int, long)):
34 raise TypeError("size parameter must be an int or long.")
35 if size == 0:
36 return ""
37 if size < 0:
38 size = None
39
40 self.buf.seek(0, os.SEEK_END)
41
42 if size is None and self.buf.tell():
43 ret = self.buf.getvalue()
44 self.buf = StringIO()
45 return ret
46 if size is None:
47 return self._data()
48
49 while self.buf.tell() < size:
50 data = self._data()
51 if not len(data):
52 ret = self.buf.getvalue()
53 self.buf = StringIO()
54 return ret
55 self.buf.write(data)
56
57 data = self.buf.getvalue()
58 self.buf = StringIO()
59 self.buf.write(data[size:])
60 return data[:size]
61
63 self.buf.seek(0, os.SEEK_END)
64 self.buf.write(data)
65
68 self.unreader = unreader
69 self.req = req
70 self.parser = self.parse_chunked(unreader)
71 self.buf = StringIO()
72
73 - def read(self, size):
74 if not isinstance(size, (int, long)):
75 raise TypeError("size must be an integral type")
76 if size <= 0:
77 raise ValueError("Size must be positive.")
78 if size == 0:
79 return ""
80
81 if self.parser:
82 while self.buf.tell() < size:
83 try:
84 self.buf.write(self.parser.next())
85 except StopIteration:
86 self.parser = None
87 break
88
89 data = self.buf.getvalue()
90 ret, rest = data[:size], data[size:]
91 self.buf.truncate(0)
92 self.buf.write(rest)
93 return ret
94
96 buf = StringIO()
97 buf.write(data)
98
99 idx = buf.getvalue().find("\r\n\r\n")
100 done = buf.getvalue()[:2] == "\r\n"
101
102 while idx < 0 and not done:
103 self.get_data(unreader, buf)
104 idx = buf.getvalue().find("\r\n\r\n")
105 done = buf.getvalue()[:2] == "\r\n"
106 if done:
107 unreader.unread(buf.getvalue()[2:])
108 return ""
109 self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx])
110 unreader.unread(buf.getvalue()[idx+4:])
111
113 (size, rest) = self.parse_chunk_size(unreader)
114 while size > 0:
115 while size > len(rest):
116 size -= len(rest)
117 yield rest
118 rest = unreader.read()
119 if not rest:
120 raise NoMoreData()
121 yield rest[:size]
122
123 rest = rest[size:]
124 while len(rest) < 2:
125 rest += unreader.read()
126 if rest[:2] != '\r\n':
127 raise ChunkMissingTerminator(rest[:2])
128 (size, rest) = self.parse_chunk_size(unreader, data=rest[2:])
129
131 buf = StringIO()
132 if data is not None:
133 buf.write(data)
134
135 idx = buf.getvalue().find("\r\n")
136 while idx < 0:
137 self.get_data(unreader, buf)
138 idx = buf.getvalue().find("\r\n")
139
140 data = buf.getvalue()
141 line, rest_chunk = data[:idx], data[idx+2:]
142
143 chunk_size = line.split(";", 1)[0].strip()
144 try:
145 chunk_size = int(chunk_size, 16)
146 except ValueError:
147 raise InvalidChunkSize(chunk_size)
148
149 if chunk_size == 0:
150 try:
151 self.parse_trailers(unreader, rest_chunk)
152 except NoMoreData:
153 pass
154 return (0, None)
155 return (chunk_size, rest_chunk)
156
158 data = unreader.read()
159 if not data:
160 raise NoMoreData()
161 buf.write(data)
162
163
165 - def __init__(self, req, unreader, length):
166 self.req = req
167 self.unreader = unreader
168 self.length = length
169
170 - def read(self, size):
171 if not isinstance(size, (int, long)):
172 raise TypeError("size must be an integral type")
173
174 size = min(self.length, size)
175 if size < 0:
176 raise ValueError("Size must be positive.")
177 if size == 0:
178 return ""
179
180 buf = StringIO()
181 data = self.unreader.read()
182 while data:
183 buf.write(data)
184 if buf.tell() >= size:
185 break
186 data = self.unreader.read()
187
188
189 buf = buf.getvalue()
190 ret, rest = buf[:size], buf[size:]
191 self.unreader.unread(rest)
192 self.length -= size
193 return ret
194
197 self.req = req
198 self.unreader = unreader
199 self.buf = StringIO()
200 self.finished = False
201
202 - def read(self, size):
203 if not isinstance(size, (int, long)):
204 raise TypeError("size must be an integral type")
205 if size < 0:
206 raise ValueError("Size must be positive.")
207 if size == 0:
208 return ""
209
210 if self.finished:
211 data = self.buf.getvalue()
212 ret, rest = data[:size], data[size:]
213 self.buf.truncate(0)
214 self.buf.write(rest)
215 return ret
216
217 data = self.unreader.read()
218 while data:
219 self.buf.write(data)
220 if self.buf.tell() > size:
221 break
222 data = self.unreader.read()
223
224 if not data:
225 self.finished = True
226
227 data = self.buf.getvalue()
228 ret, rest = data[:size], data[size:]
229 self.buf.truncate(0)
230 self.buf.write(rest)
231 return ret
232
234 - def __init__(self, reader):
235 self.reader = reader
236 self.buf = StringIO()
237 self.closed = False
238
239 - def __iter__(self):
241
243 ret = self.readline()
244 if not ret:
245 raise StopIteration()
246 return ret
247
249 data = self.read(8192)
250 while data:
251 data = self.read()
252
253 - def getsize(self, size):
254 if size is None:
255 return sys.maxint
256 elif not isinstance(size, (int, long)):
257 raise TypeError("size must be an integral type")
258 elif size < 0:
259 return sys.maxint
260 return size
261
262 - def read(self, size=None):
263 size = self.getsize(size)
264 if size == 0:
265 return ""
266
267 if size < self.buf.tell():
268 data = self.buf.getvalue()
269 ret, rest = data[:size], data[size:]
270 self.buf.truncate(0)
271 self.buf.write(rest)
272 return ret
273
274 while size > self.buf.tell():
275 data = self.reader.read(1024)
276 if not len(data):
277 self.closed = True
278 break
279 self.buf.write(data)
280
281 data = self.buf.getvalue()
282 ret, rest = data[:size], data[size:]
283 self.buf.truncate(0)
284 self.buf.write(rest)
285 return ret
286
287 - def readline(self, size=None):
288 size = self.getsize(size)
289 if size == 0:
290 return ""
291
292 line = self.buf.getvalue()
293 idx = line.find("\n")
294 if idx >= 0:
295 ret = line[:idx+1]
296 self.buf.truncate(0)
297 self.buf.write(line[idx+1:])
298 return ret
299
300 self.buf.truncate(0)
301 ch = ""
302 buf = [line]
303 lsize = len(line)
304 while lsize < size and ch != "\n":
305 ch = self.reader.read(1)
306 if not len(ch):
307 self.closed = True
308 break
309 lsize += 1
310 buf.append(ch)
311 return "".join(buf)
312
313 - def readlines(self, size=None):
314 ret = []
315 data = self.read()
316 while len(data):
317 pos = data.find("\n")
318 if pos < 0:
319 ret.append(data)
320 data = ""
321 else:
322 line, data = data[:pos+1], data[pos+1:]
323 ret.append(line)
324 return ret
325
326
327 -class GzipBody(Body):
328 - def __init__(self, reader):
329 super(GzipBody, self).__init__(reader)
330 self._d = zlib.decompressobj(16+zlib.MAX_WBITS)
331
332 - def _decompress(self, data):
333 return self._d.decompress(data)
334
335 - def read(self, size=None):
336 size = self.getsize(size)
337 if size == 0:
338 return ""
339
340 if size < self.buf.tell():
341 data = self.buf.getvalue()
342 ret, rest = data[:size], data[size:]
343 self.buf.truncate(0)
344 self.buf.write(rest)
345 return self._decompress(ret)
346
347 while size > self.buf.tell():
348 data = self.reader.read(1024)
349 if not len(data):
350 break
351 self.buf.write(data)
352
353 data = self.buf.getvalue()
354 ret, rest = data[:size], data[size:]
355 self.buf.truncate(0)
356 self.buf.write(rest)
357 return self._decompress(ret)
358
359 - def readline(self, size=None):
360 size = self.getsize(size)
361 if size == 0:
362 return ""
363
364 idx = self.buf.getvalue().find("\n")
365 while idx < 0:
366 data = self.reader.read(1024)
367 if not len(data):
368 break
369 self.buf.write(self._decompress(data))
370 idx = self.buf.getvalue().find("\n")
371 if size < self.buf.tell():
372 break
373
374
375
376 if idx < 0:
377 rlen = min(size, self.buf.tell())
378 else:
379 rlen = idx + 1
380
381
382 if rlen > size:
383 rlen = size
384
385 data = self.buf.getvalue()
386 ret, rest = data[:rlen], data[rlen:]
387
388 self.buf.truncate(0)
389 self.buf.write(rest)
390 return ret
391
392
393 -class DeflateBody(GzipBody):
394 - def __init__(self, reader):
395 super(DeflateBody, self).__init__(reader)
396 self._d = zlib.decompressobj()
397
398
400 - def __init__(self, unreader, decompress=True,
401 max_status_line_garbage=None,
402 max_header_count=0):
403 self.unreader = unreader
404 self.version = None
405 self.headers = MultiDict()
406 self.trailers = []
407 self.body = None
408 self.encoding = None
409 self.status = None
410 self.reason = None
411 self.status_int = None
412 self.decompress = decompress
413
414 if max_status_line_garbage is None:
415 max_status_line_garbage = sys.maxint
416 self.max_status_line_garbage=max_status_line_garbage
417
418 self.max_header_count=max_header_count
419
420 self.versre = re.compile("HTTP/(\d+).(\d+)")
421 self.stare = re.compile("(\d{3})\s*(\w*)")
422 self.hdrre = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
423
424 unused = self.parse(self.unreader)
425 self.unreader.unread(unused)
426 self.set_body_reader()
427
428 - def get_data(self, unreader, buf, stop=False):
429 data = unreader.read()
430 if not data:
431 if stop:
432 raise StopIteration()
433 raise NoMoreData(buf.getvalue())
434 buf.write(data)
435
436 - def parse(self, unreader):
437 buf = StringIO()
438
439 self.get_data(unreader, buf, stop=True)
440
441
442
443
444
445
446
447
448 tries = 0
449 while True:
450 idx = buf.getvalue().find("\r\n")
451 while idx < 0:
452 self.get_data(unreader, buf)
453 idx = buf.getvalue().find("\r\n")
454
455 try:
456 self.parse_first_line(buf.getvalue()[:idx])
457 break
458 except (InvalidRequestLine, InvalidHTTPVersion,
459 InvalidHTTPStatus), e:
460 if tries > self.max_status_line_garbage:
461 raise InvalidRequestLine("Status line not found %s"
462 % str(e))
463 finally:
464 rest = buf.getvalue()[idx+2:]
465 buf.truncate(0)
466 buf.write(rest)
467
468
469 tries += 1
470
471
472 idx = buf.getvalue().find("\r\n\r\n")
473 done = buf.getvalue()[:2] == "\r\n"
474 while idx < 0 and not done:
475 self.get_data(unreader, buf)
476 idx = buf.getvalue().find("\r\n\r\n")
477 done = buf.getvalue()[:2] == "\r\n"
478 if done:
479 self.unreader.unread(buf.getvalue()[2:])
480 return ""
481
482 self.headers = self.parse_headers(buf.getvalue()[:idx])
483
484 ret = buf.getvalue()[idx+4:]
485 buf.truncate(0)
486 return ret
487
489 bits = line.split(None, 1)
490 if len(bits) != 2:
491 raise InvalidRequestLine(line)
492
493
494 matchv = self.versre.match(bits[0])
495 if matchv is None:
496 raise InvalidHTTPVersion(bits[0])
497 self.version = (int(matchv.group(1)), int(matchv.group(2)))
498
499
500 matchs = self.stare.match(bits[1])
501 if matchs is None:
502 raise InvalidHTTPStatus(bits[1])
503
504 self.status = bits[1]
505 self.status_int = int(matchs.group(1))
506 self.reason = matchs.group(2)
507
509 headers = MultiDict()
510
511
512 lines = [line + "\r\n" for line in data.split("\r\n")]
513
514
515
516 hdr_count = 0
517 while len(lines):
518 if self.max_header_count and \
519 hdr_count > self.max_header_count:
520
521 raise HeaderLimit(self.max_header_count)
522
523
524 curr = lines.pop(0)
525 if curr.find(":") < 0:
526 raise InvalidHeader(curr.strip())
527 name, value = curr.split(":", 1)
528 name = name.rstrip(" \t")
529 if self.hdrre.search(name.upper()):
530 raise InvalidHeaderName(name)
531 name, value = name.strip(), [value.lstrip()]
532
533
534 while len(lines) and lines[0].startswith((" ", "\t")):
535 value.append(lines.pop(0))
536 value = ''.join(value).rstrip()
537
538 headers.add(name, value)
539 hdr_count += 1
540
541 return headers
542
543 - def set_body_reader(self):
544 clen = self.headers.iget('content-length')
545 te = self.headers.iget('transfer-encoding')
546 encoding = self.headers.iget('content-encoding')
547
548 chunked = False
549 clength = None
550 if clen is not None:
551 try:
552 clength = int(clen)
553 except ValueError:
554 pass
555 elif te is not None:
556 chunked = te.lower() == "chunked"
557
558 if encoding:
559 self.encoding = encoding.lower()
560
561 if chunked:
562 reader = ChunkedReader(self, self.unreader)
563 elif clength is not None:
564 reader = LengthReader(self, self.unreader, clength)
565 else:
566 reader = EOFReader(self, self.unreader)
567
568 if self.decompress and self.encoding in ('gzip', 'deflate',):
569 if self.encoding == "gzip":
570 self.body = GzipBody(reader)
571 else:
572 self.body = DeflateBody(reader)
573 else:
574 self.body = Body(reader)
575
577 connection = self.headers.iget("connection")
578
579 if connection is not None:
580 if connection.lower().strip() == "close":
581 return True
582 elif connection.lower().strip() == "keep-alive":
583 return False
584 return self.version <= (1, 0)
585