Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import absolute_import 

2 

3import datetime 

4import logging 

5import os 

6import re 

7import socket 

8import warnings 

9from socket import error as SocketError 

10from socket import timeout as SocketTimeout 

11 

12from .packages import six 

13from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection 

14from .packages.six.moves.http_client import HTTPException # noqa: F401 

15from .util.proxy import create_proxy_ssl_context 

16 

17try: # Compiled with SSL? 

18 import ssl 

19 

20 BaseSSLError = ssl.SSLError 

21except (ImportError, AttributeError): # Platform-specific: No SSL. 

22 ssl = None 

23 

24 class BaseSSLError(BaseException): 

25 pass 

26 

27 

28try: 

29 # Python 3: not a no-op, we're adding this to the namespace so it can be imported. 

30 ConnectionError = ConnectionError 

31except NameError: 

32 # Python 2 

33 class ConnectionError(Exception): 

34 pass 

35 

36 

37try: # Python 3: 

38 # Not a no-op, we're adding this to the namespace so it can be imported. 

39 BrokenPipeError = BrokenPipeError 

40except NameError: # Python 2: 

41 

42 class BrokenPipeError(Exception): 

43 pass 

44 

45 

46from ._collections import HTTPHeaderDict # noqa (historical, removed in v2) 

47from ._version import __version__ 

48from .exceptions import ( 

49 ConnectTimeoutError, 

50 NewConnectionError, 

51 SubjectAltNameWarning, 

52 SystemTimeWarning, 

53) 

54from .packages.ssl_match_hostname import CertificateError, match_hostname 

55from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection 

56from .util.ssl_ import ( 

57 assert_fingerprint, 

58 create_urllib3_context, 

59 resolve_cert_reqs, 

60 resolve_ssl_version, 

61 ssl_wrap_socket, 

62) 

63 

64log = logging.getLogger(__name__) 

65 

66port_by_scheme = {"http": 80, "https": 443} 

67 

68# When it comes time to update this value as a part of regular maintenance 

69# (ie test_recent_date is failing) update it to ~6 months before the current date. 

70RECENT_DATE = datetime.date(2020, 7, 1) 

71 

72_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") 

73 

74 

75class HTTPConnection(_HTTPConnection, object): 

76 """ 

77 Based on :class:`http.client.HTTPConnection` but provides an extra constructor 

78 backwards-compatibility layer between older and newer Pythons. 

79 

80 Additional keyword parameters are used to configure attributes of the connection. 

81 Accepted parameters include: 

82 

83 - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 

84 - ``source_address``: Set the source address for the current connection. 

85 - ``socket_options``: Set specific options on the underlying socket. If not specified, then 

86 defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 

87 Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 

88 

89 For example, if you wish to enable TCP Keep Alive in addition to the defaults, 

90 you might pass: 

91 

92 .. code-block:: python 

93 

94 HTTPConnection.default_socket_options + [ 

95 (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 

96 ] 

97 

98 Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 

99 """ 

100 

101 default_port = port_by_scheme["http"] 

102 

103 #: Disable Nagle's algorithm by default. 

104 #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 

105 default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 

106 

107 #: Whether this connection verifies the host's certificate. 

108 is_verified = False 

109 

110 def __init__(self, *args, **kw): 

111 if not six.PY2: 

112 kw.pop("strict", None) 

113 

114 # Pre-set source_address. 

115 self.source_address = kw.get("source_address") 

116 

117 #: The socket options provided by the user. If no options are 

118 #: provided, we use the default options. 

119 self.socket_options = kw.pop("socket_options", self.default_socket_options) 

120 

121 # Proxy options provided by the user. 

122 self.proxy = kw.pop("proxy", None) 

123 self.proxy_config = kw.pop("proxy_config", None) 

124 

125 _HTTPConnection.__init__(self, *args, **kw) 

126 

127 @property 

128 def host(self): 

129 """ 

130 Getter method to remove any trailing dots that indicate the hostname is an FQDN. 

131 

132 In general, SSL certificates don't include the trailing dot indicating a 

133 fully-qualified domain name, and thus, they don't validate properly when 

134 checked against a domain name that includes the dot. In addition, some 

135 servers may not expect to receive the trailing dot when provided. 

136 

137 However, the hostname with trailing dot is critical to DNS resolution; doing a 

138 lookup with the trailing dot will properly only resolve the appropriate FQDN, 

139 whereas a lookup without a trailing dot will search the system's search domain 

140 list. Thus, it's important to keep the original host around for use only in 

141 those cases where it's appropriate (i.e., when doing DNS lookup to establish the 

142 actual TCP connection across which we're going to send HTTP requests). 

143 """ 

144 return self._dns_host.rstrip(".") 

145 

146 @host.setter 

147 def host(self, value): 

148 """ 

149 Setter for the `host` property. 

150 

151 We assume that only urllib3 uses the _dns_host attribute; httplib itself 

152 only uses `host`, and it seems reasonable that other libraries follow suit. 

153 """ 

154 self._dns_host = value 

155 

156 def _new_conn(self): 

157 """Establish a socket connection and set nodelay settings on it. 

158 

159 :return: New socket connection. 

160 """ 

161 extra_kw = {} 

162 if self.source_address: 

163 extra_kw["source_address"] = self.source_address 

164 

165 if self.socket_options: 

166 extra_kw["socket_options"] = self.socket_options 

167 

168 try: 

169 conn = connection.create_connection( 

170 (self._dns_host, self.port), self.timeout, **extra_kw 

171 ) 

172 

173 except SocketTimeout: 

174 raise ConnectTimeoutError( 

175 self, 

176 "Connection to %s timed out. (connect timeout=%s)" 

177 % (self.host, self.timeout), 

178 ) 

179 

180 except SocketError as e: 

181 raise NewConnectionError( 

182 self, "Failed to establish a new connection: %s" % e 

183 ) 

184 

185 return conn 

186 

187 def _is_using_tunnel(self): 

188 # Google App Engine's httplib does not define _tunnel_host 

189 return getattr(self, "_tunnel_host", None) 

190 

191 def _prepare_conn(self, conn): 

192 self.sock = conn 

193 if self._is_using_tunnel(): 

194 # TODO: Fix tunnel so it doesn't depend on self.sock state. 

195 self._tunnel() 

196 # Mark this connection as not reusable 

197 self.auto_open = 0 

198 

199 def connect(self): 

200 conn = self._new_conn() 

201 self._prepare_conn(conn) 

202 

203 def putrequest(self, method, url, *args, **kwargs): 

204 """""" 

205 # Empty docstring because the indentation of CPython's implementation 

206 # is broken but we don't want this method in our documentation. 

207 match = _CONTAINS_CONTROL_CHAR_RE.search(method) 

208 if match: 

209 raise ValueError( 

210 "Method cannot contain non-token characters %r (found at least %r)" 

211 % (method, match.group()) 

212 ) 

213 

214 return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) 

215 

216 def putheader(self, header, *values): 

217 """""" 

218 if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): 

219 _HTTPConnection.putheader(self, header, *values) 

220 elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS: 

221 raise ValueError( 

222 "urllib3.util.SKIP_HEADER only supports '%s'" 

223 % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),) 

224 ) 

225 

226 def request(self, method, url, body=None, headers=None): 

227 if headers is None: 

228 headers = {} 

229 else: 

230 # Avoid modifying the headers passed into .request() 

231 headers = headers.copy() 

232 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): 

233 headers["User-Agent"] = _get_default_user_agent() 

234 super(HTTPConnection, self).request(method, url, body=body, headers=headers) 

235 

236 def request_chunked(self, method, url, body=None, headers=None): 

237 """ 

238 Alternative to the common request method, which sends the 

239 body with chunked encoding and not as one block 

240 """ 

241 headers = headers or {} 

242 header_keys = set([six.ensure_str(k.lower()) for k in headers]) 

243 skip_accept_encoding = "accept-encoding" in header_keys 

244 skip_host = "host" in header_keys 

245 self.putrequest( 

246 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host 

247 ) 

248 if "user-agent" not in header_keys: 

249 self.putheader("User-Agent", _get_default_user_agent()) 

250 for header, value in headers.items(): 

251 self.putheader(header, value) 

252 if "transfer-encoding" not in headers: 

253 self.putheader("Transfer-Encoding", "chunked") 

254 self.endheaders() 

255 

256 if body is not None: 

257 stringish_types = six.string_types + (bytes,) 

258 if isinstance(body, stringish_types): 

259 body = (body,) 

260 for chunk in body: 

261 if not chunk: 

262 continue 

263 if not isinstance(chunk, bytes): 

264 chunk = chunk.encode("utf8") 

265 len_str = hex(len(chunk))[2:] 

266 to_send = bytearray(len_str.encode()) 

267 to_send += b"\r\n" 

268 to_send += chunk 

269 to_send += b"\r\n" 

270 self.send(to_send) 

271 

272 # After the if clause, to always have a closed body 

273 self.send(b"0\r\n\r\n") 

274 

275 

276class HTTPSConnection(HTTPConnection): 

277 """ 

278 Many of the parameters to this constructor are passed to the underlying SSL 

279 socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. 

280 """ 

281 

282 default_port = port_by_scheme["https"] 

283 

284 cert_reqs = None 

285 ca_certs = None 

286 ca_cert_dir = None 

287 ca_cert_data = None 

288 ssl_version = None 

289 assert_fingerprint = None 

290 tls_in_tls_required = False 

291 

292 def __init__( 

293 self, 

294 host, 

295 port=None, 

296 key_file=None, 

297 cert_file=None, 

298 key_password=None, 

299 strict=None, 

300 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 

301 ssl_context=None, 

302 server_hostname=None, 

303 **kw 

304 ): 

305 

306 HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) 

307 

308 self.key_file = key_file 

309 self.cert_file = cert_file 

310 self.key_password = key_password 

311 self.ssl_context = ssl_context 

312 self.server_hostname = server_hostname 

313 

314 # Required property for Google AppEngine 1.9.0 which otherwise causes 

315 # HTTPS requests to go out as HTTP. (See Issue #356) 

316 self._protocol = "https" 

317 

318 def set_cert( 

319 self, 

320 key_file=None, 

321 cert_file=None, 

322 cert_reqs=None, 

323 key_password=None, 

324 ca_certs=None, 

325 assert_hostname=None, 

326 assert_fingerprint=None, 

327 ca_cert_dir=None, 

328 ca_cert_data=None, 

329 ): 

330 """ 

331 This method should only be called once, before the connection is used. 

332 """ 

333 # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also 

334 # have an SSLContext object in which case we'll use its verify_mode. 

335 if cert_reqs is None: 

336 if self.ssl_context is not None: 

337 cert_reqs = self.ssl_context.verify_mode 

338 else: 

339 cert_reqs = resolve_cert_reqs(None) 

340 

341 self.key_file = key_file 

342 self.cert_file = cert_file 

343 self.cert_reqs = cert_reqs 

344 self.key_password = key_password 

345 self.assert_hostname = assert_hostname 

346 self.assert_fingerprint = assert_fingerprint 

347 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 

348 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 

349 self.ca_cert_data = ca_cert_data 

350 

351 def connect(self): 

352 # Add certificate verification 

353 conn = self._new_conn() 

354 hostname = self.host 

355 tls_in_tls = False 

356 

357 if self._is_using_tunnel(): 

358 if self.tls_in_tls_required: 

359 conn = self._connect_tls_proxy(hostname, conn) 

360 tls_in_tls = True 

361 

362 self.sock = conn 

363 

364 # Calls self._set_hostport(), so self.host is 

365 # self._tunnel_host below. 

366 self._tunnel() 

367 # Mark this connection as not reusable 

368 self.auto_open = 0 

369 

370 # Override the host with the one we're requesting data from. 

371 hostname = self._tunnel_host 

372 

373 server_hostname = hostname 

374 if self.server_hostname is not None: 

375 server_hostname = self.server_hostname 

376 

377 is_time_off = datetime.date.today() < RECENT_DATE 

378 if is_time_off: 

379 warnings.warn( 

380 ( 

381 "System time is way off (before {0}). This will probably " 

382 "lead to SSL verification errors" 

383 ).format(RECENT_DATE), 

384 SystemTimeWarning, 

385 ) 

386 

387 # Wrap socket using verification with the root certs in 

388 # trusted_root_certs 

389 default_ssl_context = False 

390 if self.ssl_context is None: 

391 default_ssl_context = True 

392 self.ssl_context = create_urllib3_context( 

393 ssl_version=resolve_ssl_version(self.ssl_version), 

394 cert_reqs=resolve_cert_reqs(self.cert_reqs), 

395 ) 

396 

397 context = self.ssl_context 

398 context.verify_mode = resolve_cert_reqs(self.cert_reqs) 

399 

400 # Try to load OS default certs if none are given. 

401 # Works well on Windows (requires Python3.4+) 

402 if ( 

403 not self.ca_certs 

404 and not self.ca_cert_dir 

405 and not self.ca_cert_data 

406 and default_ssl_context 

407 and hasattr(context, "load_default_certs") 

408 ): 

409 context.load_default_certs() 

410 

411 self.sock = ssl_wrap_socket( 

412 sock=conn, 

413 keyfile=self.key_file, 

414 certfile=self.cert_file, 

415 key_password=self.key_password, 

416 ca_certs=self.ca_certs, 

417 ca_cert_dir=self.ca_cert_dir, 

418 ca_cert_data=self.ca_cert_data, 

419 server_hostname=server_hostname, 

420 ssl_context=context, 

421 tls_in_tls=tls_in_tls, 

422 ) 

423 

424 # If we're using all defaults and the connection 

425 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning 

426 # for the host. 

427 if ( 

428 default_ssl_context 

429 and self.ssl_version is None 

430 and hasattr(self.sock, "version") 

431 and self.sock.version() in {"TLSv1", "TLSv1.1"} 

432 ): 

433 warnings.warn( 

434 "Negotiating TLSv1/TLSv1.1 by default is deprecated " 

435 "and will be disabled in urllib3 v2.0.0. Connecting to " 

436 "'%s' with '%s' can be enabled by explicitly opting-in " 

437 "with 'ssl_version'" % (self.host, self.sock.version()), 

438 DeprecationWarning, 

439 ) 

440 

441 if self.assert_fingerprint: 

442 assert_fingerprint( 

443 self.sock.getpeercert(binary_form=True), self.assert_fingerprint 

444 ) 

445 elif ( 

446 context.verify_mode != ssl.CERT_NONE 

447 and not getattr(context, "check_hostname", False) 

448 and self.assert_hostname is not False 

449 ): 

450 # While urllib3 attempts to always turn off hostname matching from 

451 # the TLS library, this cannot always be done. So we check whether 

452 # the TLS Library still thinks it's matching hostnames. 

453 cert = self.sock.getpeercert() 

454 if not cert.get("subjectAltName", ()): 

455 warnings.warn( 

456 ( 

457 "Certificate for {0} has no `subjectAltName`, falling back to check for a " 

458 "`commonName` for now. This feature is being removed by major browsers and " 

459 "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " 

460 "for details.)".format(hostname) 

461 ), 

462 SubjectAltNameWarning, 

463 ) 

464 _match_hostname(cert, self.assert_hostname or server_hostname) 

465 

466 self.is_verified = ( 

467 context.verify_mode == ssl.CERT_REQUIRED 

468 or self.assert_fingerprint is not None 

469 ) 

470 

471 def _connect_tls_proxy(self, hostname, conn): 

472 """ 

473 Establish a TLS connection to the proxy using the provided SSL context. 

474 """ 

475 proxy_config = self.proxy_config 

476 ssl_context = proxy_config.ssl_context 

477 if ssl_context: 

478 # If the user provided a proxy context, we assume CA and client 

479 # certificates have already been set 

480 return ssl_wrap_socket( 

481 sock=conn, 

482 server_hostname=hostname, 

483 ssl_context=ssl_context, 

484 ) 

485 

486 ssl_context = create_proxy_ssl_context( 

487 self.ssl_version, 

488 self.cert_reqs, 

489 self.ca_certs, 

490 self.ca_cert_dir, 

491 self.ca_cert_data, 

492 ) 

493 # By default urllib3's SSLContext disables `check_hostname` and uses 

494 # a custom check. For proxies we're good with relying on the default 

495 # verification. 

496 ssl_context.check_hostname = True 

497 

498 # If no cert was provided, use only the default options for server 

499 # certificate validation 

500 return ssl_wrap_socket( 

501 sock=conn, 

502 ca_certs=self.ca_certs, 

503 ca_cert_dir=self.ca_cert_dir, 

504 ca_cert_data=self.ca_cert_data, 

505 server_hostname=hostname, 

506 ssl_context=ssl_context, 

507 ) 

508 

509 

510def _match_hostname(cert, asserted_hostname): 

511 try: 

512 match_hostname(cert, asserted_hostname) 

513 except CertificateError as e: 

514 log.warning( 

515 "Certificate did not match expected hostname: %s. Certificate: %s", 

516 asserted_hostname, 

517 cert, 

518 ) 

519 # Add cert to exception and reraise so client code can inspect 

520 # the cert when catching the exception, if they want to 

521 e._peer_cert = cert 

522 raise 

523 

524 

525def _get_default_user_agent(): 

526 return "python-urllib3/%s" % __version__ 

527 

528 

529class DummyConnection(object): 

530 """Used to detect a failed ConnectionCls import.""" 

531 

532 pass 

533 

534 

535if not ssl: 

536 HTTPSConnection = DummyConnection # noqa: F811 

537 

538 

539VerifiedHTTPSConnection = HTTPSConnection