Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import absolute_import 

2 

3import collections 

4import functools 

5import logging 

6 

7from ._collections import RecentlyUsedContainer 

8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme 

9from .exceptions import ( 

10 LocationValueError, 

11 MaxRetryError, 

12 ProxySchemeUnknown, 

13 ProxySchemeUnsupported, 

14 URLSchemeUnknown, 

15) 

16from .packages import six 

17from .packages.six.moves.urllib.parse import urljoin 

18from .request import RequestMethods 

19from .util.proxy import connection_requires_http_tunnel 

20from .util.retry import Retry 

21from .util.url import parse_url 

22 

23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 

24 

25 

26log = logging.getLogger(__name__) 

27 

28SSL_KEYWORDS = ( 

29 "key_file", 

30 "cert_file", 

31 "cert_reqs", 

32 "ca_certs", 

33 "ssl_version", 

34 "ca_cert_dir", 

35 "ssl_context", 

36 "key_password", 

37) 

38 

39# All known keyword arguments that could be provided to the pool manager, its 

40# pools, or the underlying connections. This is used to construct a pool key. 

41_key_fields = ( 

42 "key_scheme", # str 

43 "key_host", # str 

44 "key_port", # int 

45 "key_timeout", # int or float or Timeout 

46 "key_retries", # int or Retry 

47 "key_strict", # bool 

48 "key_block", # bool 

49 "key_source_address", # str 

50 "key_key_file", # str 

51 "key_key_password", # str 

52 "key_cert_file", # str 

53 "key_cert_reqs", # str 

54 "key_ca_certs", # str 

55 "key_ssl_version", # str 

56 "key_ca_cert_dir", # str 

57 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 

58 "key_maxsize", # int 

59 "key_headers", # dict 

60 "key__proxy", # parsed proxy url 

61 "key__proxy_headers", # dict 

62 "key__proxy_config", # class 

63 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples 

64 "key__socks_options", # dict 

65 "key_assert_hostname", # bool or string 

66 "key_assert_fingerprint", # str 

67 "key_server_hostname", # str 

68) 

69 

70#: The namedtuple class used to construct keys for the connection pool. 

71#: All custom key schemes should include the fields in this key at a minimum. 

72PoolKey = collections.namedtuple("PoolKey", _key_fields) 

73 

74_proxy_config_fields = ("ssl_context", "use_forwarding_for_https") 

75ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields) 

76 

77 

78def _default_key_normalizer(key_class, request_context): 

79 """ 

80 Create a pool key out of a request context dictionary. 

81 

82 According to RFC 3986, both the scheme and host are case-insensitive. 

83 Therefore, this function normalizes both before constructing the pool 

84 key for an HTTPS request. If you wish to change this behaviour, provide 

85 alternate callables to ``key_fn_by_scheme``. 

86 

87 :param key_class: 

88 The class to use when constructing the key. This should be a namedtuple 

89 with the ``scheme`` and ``host`` keys at a minimum. 

90 :type key_class: namedtuple 

91 :param request_context: 

92 A dictionary-like object that contain the context for a request. 

93 :type request_context: dict 

94 

95 :return: A namedtuple that can be used as a connection pool key. 

96 :rtype: PoolKey 

97 """ 

98 # Since we mutate the dictionary, make a copy first 

99 context = request_context.copy() 

100 context["scheme"] = context["scheme"].lower() 

101 context["host"] = context["host"].lower() 

102 

103 # These are both dictionaries and need to be transformed into frozensets 

104 for key in ("headers", "_proxy_headers", "_socks_options"): 

105 if key in context and context[key] is not None: 

106 context[key] = frozenset(context[key].items()) 

107 

108 # The socket_options key may be a list and needs to be transformed into a 

109 # tuple. 

110 socket_opts = context.get("socket_options") 

111 if socket_opts is not None: 

112 context["socket_options"] = tuple(socket_opts) 

113 

114 # Map the kwargs to the names in the namedtuple - this is necessary since 

115 # namedtuples can't have fields starting with '_'. 

116 for key in list(context.keys()): 

117 context["key_" + key] = context.pop(key) 

118 

119 # Default to ``None`` for keys missing from the context 

120 for field in key_class._fields: 

121 if field not in context: 

122 context[field] = None 

123 

124 return key_class(**context) 

125 

126 

127#: A dictionary that maps a scheme to a callable that creates a pool key. 

128#: This can be used to alter the way pool keys are constructed, if desired. 

129#: Each PoolManager makes a copy of this dictionary so they can be configured 

130#: globally here, or individually on the instance. 

131key_fn_by_scheme = { 

132 "http": functools.partial(_default_key_normalizer, PoolKey), 

133 "https": functools.partial(_default_key_normalizer, PoolKey), 

134} 

135 

136pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 

137 

138 

139class PoolManager(RequestMethods): 

140 """ 

141 Allows for arbitrary requests while transparently keeping track of 

142 necessary connection pools for you. 

143 

144 :param num_pools: 

145 Number of connection pools to cache before discarding the least 

146 recently used pool. 

147 

148 :param headers: 

149 Headers to include with all requests, unless other headers are given 

150 explicitly. 

151 

152 :param \\**connection_pool_kw: 

153 Additional parameters are used to create fresh 

154 :class:`urllib3.connectionpool.ConnectionPool` instances. 

155 

156 Example:: 

157 

158 >>> manager = PoolManager(num_pools=2) 

159 >>> r = manager.request('GET', 'http://google.com/') 

160 >>> r = manager.request('GET', 'http://google.com/mail') 

161 >>> r = manager.request('GET', 'http://yahoo.com/') 

162 >>> len(manager.pools) 

163 2 

164 

165 """ 

166 

167 proxy = None 

168 proxy_config = None 

169 

170 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 

171 RequestMethods.__init__(self, headers) 

172 self.connection_pool_kw = connection_pool_kw 

173 self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) 

174 

175 # Locally set the pool classes and keys so other PoolManagers can 

176 # override them. 

177 self.pool_classes_by_scheme = pool_classes_by_scheme 

178 self.key_fn_by_scheme = key_fn_by_scheme.copy() 

179 

180 def __enter__(self): 

181 return self 

182 

183 def __exit__(self, exc_type, exc_val, exc_tb): 

184 self.clear() 

185 # Return False to re-raise any potential exceptions 

186 return False 

187 

188 def _new_pool(self, scheme, host, port, request_context=None): 

189 """ 

190 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and 

191 any additional pool keyword arguments. 

192 

193 If ``request_context`` is provided, it is provided as keyword arguments 

194 to the pool class used. This method is used to actually create the 

195 connection pools handed out by :meth:`connection_from_url` and 

196 companion methods. It is intended to be overridden for customization. 

197 """ 

198 pool_cls = self.pool_classes_by_scheme[scheme] 

199 if request_context is None: 

200 request_context = self.connection_pool_kw.copy() 

201 

202 # Although the context has everything necessary to create the pool, 

203 # this function has historically only used the scheme, host, and port 

204 # in the positional args. When an API change is acceptable these can 

205 # be removed. 

206 for key in ("scheme", "host", "port"): 

207 request_context.pop(key, None) 

208 

209 if scheme == "http": 

210 for kw in SSL_KEYWORDS: 

211 request_context.pop(kw, None) 

212 

213 return pool_cls(host, port, **request_context) 

214 

215 def clear(self): 

216 """ 

217 Empty our store of pools and direct them all to close. 

218 

219 This will not affect in-flight connections, but they will not be 

220 re-used after completion. 

221 """ 

222 self.pools.clear() 

223 

224 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 

225 """ 

226 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. 

227 

228 If ``port`` isn't given, it will be derived from the ``scheme`` using 

229 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 

230 provided, it is merged with the instance's ``connection_pool_kw`` 

231 variable and used to create the new connection pool, if one is 

232 needed. 

233 """ 

234 

235 if not host: 

236 raise LocationValueError("No host specified.") 

237 

238 request_context = self._merge_pool_kwargs(pool_kwargs) 

239 request_context["scheme"] = scheme or "http" 

240 if not port: 

241 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 

242 request_context["port"] = port 

243 request_context["host"] = host 

244 

245 return self.connection_from_context(request_context) 

246 

247 def connection_from_context(self, request_context): 

248 """ 

249 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. 

250 

251 ``request_context`` must at least contain the ``scheme`` key and its 

252 value must be a key in ``key_fn_by_scheme`` instance variable. 

253 """ 

254 scheme = request_context["scheme"].lower() 

255 pool_key_constructor = self.key_fn_by_scheme.get(scheme) 

256 if not pool_key_constructor: 

257 raise URLSchemeUnknown(scheme) 

258 pool_key = pool_key_constructor(request_context) 

259 

260 return self.connection_from_pool_key(pool_key, request_context=request_context) 

261 

262 def connection_from_pool_key(self, pool_key, request_context=None): 

263 """ 

264 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. 

265 

266 ``pool_key`` should be a namedtuple that only contains immutable 

267 objects. At a minimum it must have the ``scheme``, ``host``, and 

268 ``port`` fields. 

269 """ 

270 with self.pools.lock: 

271 # If the scheme, host, or port doesn't match existing open 

272 # connections, open a new ConnectionPool. 

273 pool = self.pools.get(pool_key) 

274 if pool: 

275 return pool 

276 

277 # Make a fresh ConnectionPool of the desired type 

278 scheme = request_context["scheme"] 

279 host = request_context["host"] 

280 port = request_context["port"] 

281 pool = self._new_pool(scheme, host, port, request_context=request_context) 

282 self.pools[pool_key] = pool 

283 

284 return pool 

285 

286 def connection_from_url(self, url, pool_kwargs=None): 

287 """ 

288 Similar to :func:`urllib3.connectionpool.connection_from_url`. 

289 

290 If ``pool_kwargs`` is not provided and a new pool needs to be 

291 constructed, ``self.connection_pool_kw`` is used to initialize 

292 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 

293 is provided, it is used instead. Note that if a new pool does not 

294 need to be created for the request, the provided ``pool_kwargs`` are 

295 not used. 

296 """ 

297 u = parse_url(url) 

298 return self.connection_from_host( 

299 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 

300 ) 

301 

302 def _merge_pool_kwargs(self, override): 

303 """ 

304 Merge a dictionary of override values for self.connection_pool_kw. 

305 

306 This does not modify self.connection_pool_kw and returns a new dict. 

307 Any keys in the override dictionary with a value of ``None`` are 

308 removed from the merged dictionary. 

309 """ 

310 base_pool_kwargs = self.connection_pool_kw.copy() 

311 if override: 

312 for key, value in override.items(): 

313 if value is None: 

314 try: 

315 del base_pool_kwargs[key] 

316 except KeyError: 

317 pass 

318 else: 

319 base_pool_kwargs[key] = value 

320 return base_pool_kwargs 

321 

322 def _proxy_requires_url_absolute_form(self, parsed_url): 

323 """ 

324 Indicates if the proxy requires the complete destination URL in the 

325 request. Normally this is only needed when not using an HTTP CONNECT 

326 tunnel. 

327 """ 

328 if self.proxy is None: 

329 return False 

330 

331 return not connection_requires_http_tunnel( 

332 self.proxy, self.proxy_config, parsed_url.scheme 

333 ) 

334 

335 def _validate_proxy_scheme_url_selection(self, url_scheme): 

336 """ 

337 Validates that were not attempting to do TLS in TLS connections on 

338 Python2 or with unsupported SSL implementations. 

339 """ 

340 if self.proxy is None or url_scheme != "https": 

341 return 

342 

343 if self.proxy.scheme != "https": 

344 return 

345 

346 if six.PY2 and not self.proxy_config.use_forwarding_for_https: 

347 raise ProxySchemeUnsupported( 

348 "Contacting HTTPS destinations through HTTPS proxies " 

349 "'via CONNECT tunnels' is not supported in Python 2" 

350 ) 

351 

352 def urlopen(self, method, url, redirect=True, **kw): 

353 """ 

354 Same as :meth:`urllib3.HTTPConnectionPool.urlopen` 

355 with custom cross-host redirect logic and only sends the request-uri 

356 portion of the ``url``. 

357 

358 The given ``url`` parameter must be absolute, such that an appropriate 

359 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 

360 """ 

361 u = parse_url(url) 

362 self._validate_proxy_scheme_url_selection(u.scheme) 

363 

364 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

365 

366 kw["assert_same_host"] = False 

367 kw["redirect"] = False 

368 

369 if "headers" not in kw: 

370 kw["headers"] = self.headers.copy() 

371 

372 if self._proxy_requires_url_absolute_form(u): 

373 response = conn.urlopen(method, url, **kw) 

374 else: 

375 response = conn.urlopen(method, u.request_uri, **kw) 

376 

377 redirect_location = redirect and response.get_redirect_location() 

378 if not redirect_location: 

379 return response 

380 

381 # Support relative URLs for redirecting. 

382 redirect_location = urljoin(url, redirect_location) 

383 

384 # RFC 7231, Section 6.4.4 

385 if response.status == 303: 

386 method = "GET" 

387 

388 retries = kw.get("retries") 

389 if not isinstance(retries, Retry): 

390 retries = Retry.from_int(retries, redirect=redirect) 

391 

392 # Strip headers marked as unsafe to forward to the redirected location. 

393 # Check remove_headers_on_redirect to avoid a potential network call within 

394 # conn.is_same_host() which may use socket.gethostbyname() in the future. 

395 if retries.remove_headers_on_redirect and not conn.is_same_host( 

396 redirect_location 

397 ): 

398 headers = list(six.iterkeys(kw["headers"])) 

399 for header in headers: 

400 if header.lower() in retries.remove_headers_on_redirect: 

401 kw["headers"].pop(header, None) 

402 

403 try: 

404 retries = retries.increment(method, url, response=response, _pool=conn) 

405 except MaxRetryError: 

406 if retries.raise_on_redirect: 

407 response.drain_conn() 

408 raise 

409 return response 

410 

411 kw["retries"] = retries 

412 kw["redirect"] = redirect 

413 

414 log.info("Redirecting %s -> %s", url, redirect_location) 

415 

416 response.drain_conn() 

417 return self.urlopen(method, redirect_location, **kw) 

418 

419 

420class ProxyManager(PoolManager): 

421 """ 

422 Behaves just like :class:`PoolManager`, but sends all requests through 

423 the defined proxy, using the CONNECT method for HTTPS URLs. 

424 

425 :param proxy_url: 

426 The URL of the proxy to be used. 

427 

428 :param proxy_headers: 

429 A dictionary containing headers that will be sent to the proxy. In case 

430 of HTTP they are being sent with each request, while in the 

431 HTTPS/CONNECT case they are sent only once. Could be used for proxy 

432 authentication. 

433 

434 :param proxy_ssl_context: 

435 The proxy SSL context is used to establish the TLS connection to the 

436 proxy when using HTTPS proxies. 

437 

438 :param use_forwarding_for_https: 

439 (Defaults to False) If set to True will forward requests to the HTTPS 

440 proxy to be made on behalf of the client instead of creating a TLS 

441 tunnel via the CONNECT method. **Enabling this flag means that request 

442 and response headers and content will be visible from the HTTPS proxy** 

443 whereas tunneling keeps request and response headers and content 

444 private. IP address, target hostname, SNI, and port are always visible 

445 to an HTTPS proxy even when this flag is disabled. 

446 

447 Example: 

448 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 

449 >>> r1 = proxy.request('GET', 'http://google.com/') 

450 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 

451 >>> len(proxy.pools) 

452 1 

453 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 

454 >>> r4 = proxy.request('GET', 'https://twitter.com/') 

455 >>> len(proxy.pools) 

456 3 

457 

458 """ 

459 

460 def __init__( 

461 self, 

462 proxy_url, 

463 num_pools=10, 

464 headers=None, 

465 proxy_headers=None, 

466 proxy_ssl_context=None, 

467 use_forwarding_for_https=False, 

468 **connection_pool_kw 

469 ): 

470 

471 if isinstance(proxy_url, HTTPConnectionPool): 

472 proxy_url = "%s://%s:%i" % ( 

473 proxy_url.scheme, 

474 proxy_url.host, 

475 proxy_url.port, 

476 ) 

477 proxy = parse_url(proxy_url) 

478 

479 if proxy.scheme not in ("http", "https"): 

480 raise ProxySchemeUnknown(proxy.scheme) 

481 

482 if not proxy.port: 

483 port = port_by_scheme.get(proxy.scheme, 80) 

484 proxy = proxy._replace(port=port) 

485 

486 self.proxy = proxy 

487 self.proxy_headers = proxy_headers or {} 

488 self.proxy_ssl_context = proxy_ssl_context 

489 self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https) 

490 

491 connection_pool_kw["_proxy"] = self.proxy 

492 connection_pool_kw["_proxy_headers"] = self.proxy_headers 

493 connection_pool_kw["_proxy_config"] = self.proxy_config 

494 

495 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) 

496 

497 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 

498 if scheme == "https": 

499 return super(ProxyManager, self).connection_from_host( 

500 host, port, scheme, pool_kwargs=pool_kwargs 

501 ) 

502 

503 return super(ProxyManager, self).connection_from_host( 

504 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs 

505 ) 

506 

507 def _set_proxy_headers(self, url, headers=None): 

508 """ 

509 Sets headers needed by proxies: specifically, the Accept and Host 

510 headers. Only sets headers not provided by the user. 

511 """ 

512 headers_ = {"Accept": "*/*"} 

513 

514 netloc = parse_url(url).netloc 

515 if netloc: 

516 headers_["Host"] = netloc 

517 

518 if headers: 

519 headers_.update(headers) 

520 return headers_ 

521 

522 def urlopen(self, method, url, redirect=True, **kw): 

523 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 

524 u = parse_url(url) 

525 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): 

526 # For connections using HTTP CONNECT, httplib sets the necessary 

527 # headers on the CONNECT to the proxy. If we're not using CONNECT, 

528 # we'll definitely need to set 'Host' at the very least. 

529 headers = kw.get("headers", self.headers) 

530 kw["headers"] = self._set_proxy_headers(url, headers) 

531 

532 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 

533 

534 

535def proxy_from_url(url, **kw): 

536 return ProxyManager(proxy_url=url, **kw)