1
2
3
4
5 import base64
6 import errno
7 import logging
8 import os
9 import time
10 import socket
11 import traceback
12 import types
13 import urlparse
14
15 try:
16 import ssl
17 _ssl_wrapper = ssl.wrap_socket
18 have_ssl = True
19 except ImportError:
20 if hasattr(socket, "ssl"):
21 from httplib import FakeSocket
22 from .sock import trust_all_certificates
23
24 @trust_all_certificates
26 ssl_sck = socket.ssl(sck, **kwargs)
27 return FakeSocket(sck, ssl_sck)
28 have_ssl = True
29 else:
30 have_ssl = False
31
32 from . import __version__
33 from .conn import Connection
34 from .errors import RequestError, RequestTimeout, RedirectLimit, \
35 NoMoreData, ProxyError
36 from .globals import get_manager
37 from . import http
38
39 from .sock import close, send, sendfile, sendlines, send_chunk, \
40 validate_ssl_args
41 from .util import parse_netloc, rewrite_location
42 from .wrappers import Request, Response
43
44
45 MAX_CLIENT_TIMEOUT=300
46 MAX_CLIENT_CONNECTIONS = 5
47 MAX_CLIENT_TRIES = 5
48 CLIENT_WAIT_TRIES = 0.3
49 MAX_FOLLOW_REDIRECTS = 5
50 USER_AGENT = "restkit/%s" % __version__
51
52 log = logging.getLogger(__name__)
55
56 """ A client handle a connection at a time. A client is threadsafe,
57 but an handled shouldn't be shared between threads. All connections
58 are shared between threads via a pool.
59
60 >>> from restkit import *
61 >>> c = Client()
62 >>> r = c.request("http://google.com")
63 r>>> r.status
64 '301 Moved Permanently'
65 >>> r.body_string()
66 '<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF="http://www.google.com/">here</A>.\r\n</BODY></HTML>\r\n'
67 >>> c.follow_redirect = True
68 >>> r = c.request("http://google.com")
69 >>> r.status
70 '200 OK'
71
72 """
73
74 version = (1, 1)
75 response_class=Response
76
77 - def __init__(self,
78 follow_redirect=False,
79 force_follow_redirect=False,
80 max_follow_redirect=MAX_FOLLOW_REDIRECTS,
81 filters=None,
82 decompress=True,
83 max_status_line_garbage=None,
84 max_header_count=0,
85 manager=None,
86 response_class=None,
87 timeout=None,
88 use_proxy=False,
89 max_tries=5,
90 wait_tries=1.0,
91 **ssl_args):
92 """
93 Client parameters
94 ~~~~~~~~~~~~~~~~~
95
96 :param follow_redirect: follow redirection, by default False
97 :param max_ollow_redirect: number of redirections available
98 :filters: http filters to pass
99 :param decompress: allows the client to decompress the response
100 body
101 :param max_status_line_garbage: defines the maximum number of ignorable
102 lines before we expect a HTTP response's status line. With
103 HTTP/1.1 persistent connections, the problem arises that broken
104 scripts could return a wrong Content-Length (there are more
105 bytes sent than specified). Unfortunately, in some cases, this
106 cannot be detected after the bad response, but only before the
107 next one. So the client is abble to skip bad lines using this
108 limit. 0 disable garbage collection, None means unlimited number
109 of tries.
110 :param max_header_count: determines the maximum HTTP header count
111 allowed. by default no limit.
112 :param manager: the manager to use. By default we use the global
113 one.
114 :parama response_class: the response class to use
115 :param timeout: the default timeout of the connection
116 (SO_TIMEOUT)
117
118 :param max_tries: the number of tries before we give up a
119 connection
120 :param wait_tries: number of time we wait between each tries.
121 :param ssl_args: named argument, see ssl module for more
122 informations
123 """
124 self.follow_redirect = follow_redirect
125 self.force_follow_redirect = force_follow_redirect
126 self.max_follow_redirect = max_follow_redirect
127 self.decompress = decompress
128 self.filters = filters or []
129 self.max_status_line_garbage = max_status_line_garbage
130 self.max_header_count = max_header_count
131 self.use_proxy = use_proxy
132
133 self.request_filters = []
134 self.response_filters = []
135 self.load_filters()
136
137
138
139 if manager is None:
140 manager = get_manager()
141 self._manager = manager
142
143
144 if response_class is not None:
145 self.response_class = response_class
146
147 self.max_tries = max_tries
148 self.wait_tries = wait_tries
149 self.timeout = timeout
150
151 self._nb_redirections = self.max_follow_redirect
152 self._url = None
153 self._initial_url = None
154 self._write_cb = None
155 self._headers = None
156 self._sock_key = None
157 self._sock = None
158 self._original = None
159
160 self.method = 'GET'
161 self.body = None
162 self.ssl_args = ssl_args or {}
163
165 """ Populate filters from self.filters.
166 Must be called each time self.filters is updated.
167 """
168 for f in self.filters:
169 if hasattr(f, "on_request"):
170 self.request_filters.append(f)
171 if hasattr(f, "on_response"):
172 self.response_filters.append(f)
173
175 """ create a socket """
176 if log.isEnabledFor(logging.DEBUG):
177 log.debug("create new connection")
178 for res in socket.getaddrinfo(addr[0], addr[1], 0,
179 socket.SOCK_STREAM):
180 af, socktype, proto, canonname, sa = res
181
182 try:
183 sck = socket.socket(af, socktype, proto)
184
185 if self.timeout is not None:
186 sck.settimeout(self.timeout)
187
188 sck.connect(sa)
189
190 if is_ssl:
191 if not have_ssl:
192 raise ValueError("https isn't supported. On python 2.5x,"
193 + " https support requires ssl module "
194 + "(http://pypi.python.org/pypi/ssl) "
195 + "to be intalled.")
196 validate_ssl_args(self.ssl_args)
197 sck = _ssl_wrapper(sck, **self.ssl_args)
198
199 return sck
200 except socket.error:
201 close(sck)
202 raise socket.error, "getaddrinfo returns an empty list"
203
205 """ get a connection from the pool or create new one. """
206
207 addr = parse_netloc(request.parsed_url)
208 is_ssl = request.is_ssl()
209
210 extra_headers = []
211 sck = None
212 if self.use_proxy:
213 sck, addr, extra_headers = self.proxy_connection(request, addr, ssl)
214 if not sck:
215 sck = self._manager.find_socket(addr, is_ssl)
216 if sck is None:
217 sck = self.connect(addr, is_ssl)
218
219
220 if self.timeout is not None:
221 sck.settimeout(self.timeout)
222
223 connection = Connection(sck, self._manager, addr,
224 ssl=is_ssl, extra_headers=extra_headers)
225 return connection
226
228 """ do the proxy connection """
229 proxy_settings = os.environ.get('%s_proxy' %
230 request.parsed_url.scheme)
231
232 if proxy_settings and proxy_settings is not None:
233 proxy_settings, proxy_auth = _get_proxy_auth(proxy_settings)
234 addr = parse_netloc(urlparse.urlparse(proxy_settings))
235
236 if ssl:
237 if proxy_auth:
238 proxy_auth = 'Proxy-authorization: %s' % proxy_auth
239 proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % req_addr
240
241 user_agent = request.headers.iget('user_agent')
242 if not user_agent:
243 user_agent = "User-Agent: restkit/%s\r\n" % __version__
244
245 proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth,
246 user_agent)
247
248 sck = self._manager.find_socket(addr, ssl)
249 if sck is None:
250 self = self.connect(addr, ssl)
251
252 send(sck, proxy_pieces)
253 unreader = http.Unreader(sck)
254 resp = http.Request(unreader)
255 body = resp.body.read()
256 if resp.status_int != 200:
257 raise ProxyError("Tunnel connection failed: %d %s" %
258 (resp.status_int, body))
259
260 return sck, addr, []
261 else:
262 headers = []
263 if proxy_auth:
264 headers = [('Proxy-authorization', proxy_auth)]
265
266 sck = self._manager.find_socket(addr, ssl)
267 if sck is None:
268 sck = self.connect(addr, ssl)
269 return sck, addr, headers
270 return None, req_addr, []
271
273 """ create final header string """
274 headers = request.headers.copy()
275 if extra_headers is not None:
276 for k, v in extra_headers:
277 headers[k] = v
278
279 if not request.body and request.method in ('POST', 'PUT',):
280 headers['Content-Length'] = 0
281
282 if self.version == (1,1):
283 httpver = "HTTP/1.1"
284 else:
285 httpver = "HTTP/1.0"
286
287 ua = headers.iget('user_agent')
288 if not ua:
289 ua = USER_AGENT
290 host = request.host
291
292 accept_encoding = headers.iget('accept-encoding')
293 if not accept_encoding:
294 accept_encoding = 'identity'
295
296 lheaders = [
297 "%s %s %s\r\n" % (request.method, request.path, httpver),
298 "Host: %s\r\n" % host,
299 "User-Agent: %s\r\n" % ua,
300 "Accept-Encoding: %s\r\n" % accept_encoding
301 ]
302
303 lheaders.extend(["%s: %s\r\n" % (k, str(v)) for k, v in \
304 headers.items() if k.lower() not in \
305 ('user-agent', 'host', 'accept-encoding',)])
306 if log.isEnabledFor(logging.DEBUG):
307 log.debug("Send headers: %s" % lheaders)
308 return "%s\r\n" % "".join(lheaders)
309
429
430
431 - def request(self, url, method='GET', body=None, headers=None):
449
450 - def redirect(self, resp, location, request):
451 """ reset request, set new url of request and perform it """
452 if self._nb_redirections <= 0:
453 raise RedirectLimit("Redirection limit is reached")
454
455 if request.initial_url is None:
456 request.initial_url = self.url
457
458
459 if hasattr(resp, "_body"):
460 resp._body.discard()
461 else:
462 resp.body.discard()
463
464
465 location = rewrite_location(request.url, location)
466
467 if log.isEnabledFor(logging.DEBUG):
468 log.debug("Redirect to %s" % location)
469
470
471 request.url = location
472
473 self._nb_redirections -= 1
474
475
476 return self.perform(request)
477
479 """ return final respons, it is only accessible via peform
480 method """
481 if log.isEnabledFor(logging.DEBUG):
482 log.debug("Start to parse response")
483
484 unreader = http.Unreader(connection.socket())
485 while True:
486 resp = http.Request(unreader, decompress=self.decompress,
487 max_status_line_garbage=self.max_status_line_garbage,
488 max_header_count=self.max_header_count)
489 if resp.status_int != 100:
490 break
491 resp.body.discard()
492
493 if log.isEnabledFor(logging.DEBUG):
494 log.debug("Got response: %s" % resp.status)
495 log.debug("headers: [%s]" % resp.headers)
496
497 location = resp.headers.iget('location')
498
499 if self.follow_redirect:
500 if resp.status_int in (301, 302, 307,):
501 if request.method in ('GET', 'HEAD',) or \
502 self.force_follow_redirect:
503 if hasattr(self.body, 'read'):
504 try:
505 self.body.seek(0)
506 except AttributeError:
507 connection.release()
508 raise RequestError("Can't redirect %s to %s "
509 "because body has already been read"
510 % (self.url, location))
511 connection.release()
512 return self.redirect(resp, location, request)
513
514 elif resp.status_int == 303 and self.method == "POST":
515 connection.release()
516 request.method = "GET"
517 request.body = None
518 return self.redirect(resp, location, request)
519
520
521 resp = self.response_class(connection, request, resp)
522
523
524 for f in self.response_filters:
525 f.on_response(resp, request)
526
527 if log.isEnabledFor(logging.DEBUG):
528 log.debug("return response class")
529
530
531 return resp
532
535 proxy_username = os.environ.get('proxy-username')
536 if not proxy_username:
537 proxy_username = os.environ.get('proxy_username')
538 proxy_password = os.environ.get('proxy-password')
539 if not proxy_password:
540 proxy_password = os.environ.get('proxy_password')
541
542 proxy_password = proxy_password or ""
543
544 if not proxy_username:
545 u = urlparse.urlparse(proxy_settings)
546 if u.username:
547 proxy_password = u.password or proxy_password
548 proxy_settings = urlparse.urlunparse((u.scheme,
549 u.netloc.split("@")[-1], u.path, u.params, u.query,
550 u.fragment))
551
552 if proxy_username:
553 user_auth = base64.encodestring('%s:%s' % (proxy_username,
554 proxy_password))
555 return proxy_settings, 'Basic %s\r\n' % (user_auth.strip())
556 else:
557 return proxy_settings, ''
558