Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2 

3""" 

4requests.utils 

5~~~~~~~~~~~~~~ 

6 

7This module provides utility functions that are used within Requests 

8that are also useful for external consumption. 

9""" 

10 

11import codecs 

12import contextlib 

13import io 

14import os 

15import re 

16import socket 

17import struct 

18import sys 

19import tempfile 

20import warnings 

21import zipfile 

22from collections import OrderedDict 

23 

24from .__version__ import __version__ 

25from . import certs 

26# to_native_string is unused here, but imported here for backwards compatibility 

27from ._internal_utils import to_native_string 

28from .compat import parse_http_list as _parse_list_header 

29from .compat import ( 

30 quote, urlparse, bytes, str, unquote, getproxies, 

31 proxy_bypass, urlunparse, basestring, integer_types, is_py3, 

32 proxy_bypass_environment, getproxies_environment, Mapping) 

33from .cookies import cookiejar_from_dict 

34from .structures import CaseInsensitiveDict 

35from .exceptions import ( 

36 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) 

37 

38NETRC_FILES = ('.netrc', '_netrc') 

39 

40DEFAULT_CA_BUNDLE_PATH = certs.where() 

41 

42DEFAULT_PORTS = {'http': 80, 'https': 443} 

43 

44 

45if sys.platform == 'win32': 

46 # provide a proxy_bypass version on Windows without DNS lookups 

47 

48 def proxy_bypass_registry(host): 

49 try: 

50 if is_py3: 

51 import winreg 

52 else: 

53 import _winreg as winreg 

54 except ImportError: 

55 return False 

56 

57 try: 

58 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, 

59 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 

60 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it 

61 proxyEnable = int(winreg.QueryValueEx(internetSettings, 

62 'ProxyEnable')[0]) 

63 # ProxyOverride is almost always a string 

64 proxyOverride = winreg.QueryValueEx(internetSettings, 

65 'ProxyOverride')[0] 

66 except OSError: 

67 return False 

68 if not proxyEnable or not proxyOverride: 

69 return False 

70 

71 # make a check value list from the registry entry: replace the 

72 # '<local>' string by the localhost entry and the corresponding 

73 # canonical entry. 

74 proxyOverride = proxyOverride.split(';') 

75 # now check if we match one of the registry values. 

76 for test in proxyOverride: 

77 if test == '<local>': 

78 if '.' not in host: 

79 return True 

80 test = test.replace(".", r"\.") # mask dots 

81 test = test.replace("*", r".*") # change glob sequence 

82 test = test.replace("?", r".") # change glob char 

83 if re.match(test, host, re.I): 

84 return True 

85 return False 

86 

87 def proxy_bypass(host): # noqa 

88 """Return True, if the host should be bypassed. 

89 

90 Checks proxy settings gathered from the environment, if specified, 

91 or the registry. 

92 """ 

93 if getproxies_environment(): 

94 return proxy_bypass_environment(host) 

95 else: 

96 return proxy_bypass_registry(host) 

97 

98 

99def dict_to_sequence(d): 

100 """Returns an internal sequence dictionary update.""" 

101 

102 if hasattr(d, 'items'): 

103 d = d.items() 

104 

105 return d 

106 

107 

108def super_len(o): 

109 total_length = None 

110 current_position = 0 

111 

112 if hasattr(o, '__len__'): 

113 total_length = len(o) 

114 

115 elif hasattr(o, 'len'): 

116 total_length = o.len 

117 

118 elif hasattr(o, 'fileno'): 

119 try: 

120 fileno = o.fileno() 

121 except io.UnsupportedOperation: 

122 pass 

123 else: 

124 total_length = os.fstat(fileno).st_size 

125 

126 # Having used fstat to determine the file length, we need to 

127 # confirm that this file was opened up in binary mode. 

128 if 'b' not in o.mode: 

129 warnings.warn(( 

130 "Requests has determined the content-length for this " 

131 "request using the binary size of the file: however, the " 

132 "file has been opened in text mode (i.e. without the 'b' " 

133 "flag in the mode). This may lead to an incorrect " 

134 "content-length. In Requests 3.0, support will be removed " 

135 "for files in text mode."), 

136 FileModeWarning 

137 ) 

138 

139 if hasattr(o, 'tell'): 

140 try: 

141 current_position = o.tell() 

142 except (OSError, IOError): 

143 # This can happen in some weird situations, such as when the file 

144 # is actually a special file descriptor like stdin. In this 

145 # instance, we don't know what the length is, so set it to zero and 

146 # let requests chunk it instead. 

147 if total_length is not None: 

148 current_position = total_length 

149 else: 

150 if hasattr(o, 'seek') and total_length is None: 

151 # StringIO and BytesIO have seek but no useable fileno 

152 try: 

153 # seek to end of file 

154 o.seek(0, 2) 

155 total_length = o.tell() 

156 

157 # seek back to current position to support 

158 # partially read file-like objects 

159 o.seek(current_position or 0) 

160 except (OSError, IOError): 

161 total_length = 0 

162 

163 if total_length is None: 

164 total_length = 0 

165 

166 return max(0, total_length - current_position) 

167 

168 

169def get_netrc_auth(url, raise_errors=False): 

170 """Returns the Requests tuple auth for a given url from netrc.""" 

171 

172 netrc_file = os.environ.get('NETRC') 

173 if netrc_file is not None: 

174 netrc_locations = (netrc_file,) 

175 else: 

176 netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES) 

177 

178 try: 

179 from netrc import netrc, NetrcParseError 

180 

181 netrc_path = None 

182 

183 for f in netrc_locations: 

184 try: 

185 loc = os.path.expanduser(f) 

186 except KeyError: 

187 # os.path.expanduser can fail when $HOME is undefined and 

188 # getpwuid fails. See https://bugs.python.org/issue20164 & 

189 # https://github.com/psf/requests/issues/1846 

190 return 

191 

192 if os.path.exists(loc): 

193 netrc_path = loc 

194 break 

195 

196 # Abort early if there isn't one. 

197 if netrc_path is None: 

198 return 

199 

200 ri = urlparse(url) 

201 

202 # Strip port numbers from netloc. This weird `if...encode`` dance is 

203 # used for Python 3.2, which doesn't support unicode literals. 

204 splitstr = b':' 

205 if isinstance(url, str): 

206 splitstr = splitstr.decode('ascii') 

207 host = ri.netloc.split(splitstr)[0] 

208 

209 try: 

210 _netrc = netrc(netrc_path).authenticators(host) 

211 if _netrc: 

212 # Return with login / password 

213 login_i = (0 if _netrc[0] else 1) 

214 return (_netrc[login_i], _netrc[2]) 

215 except (NetrcParseError, IOError): 

216 # If there was a parsing error or a permissions issue reading the file, 

217 # we'll just skip netrc auth unless explicitly asked to raise errors. 

218 if raise_errors: 

219 raise 

220 

221 # App Engine hackiness. 

222 except (ImportError, AttributeError): 

223 pass 

224 

225 

226def guess_filename(obj): 

227 """Tries to guess the filename of the given object.""" 

228 name = getattr(obj, 'name', None) 

229 if (name and isinstance(name, basestring) and name[0] != '<' and 

230 name[-1] != '>'): 

231 return os.path.basename(name) 

232 

233 

234def extract_zipped_paths(path): 

235 """Replace nonexistent paths that look like they refer to a member of a zip 

236 archive with the location of an extracted copy of the target, or else 

237 just return the provided path unchanged. 

238 """ 

239 if os.path.exists(path): 

240 # this is already a valid path, no need to do anything further 

241 return path 

242 

243 # find the first valid part of the provided path and treat that as a zip archive 

244 # assume the rest of the path is the name of a member in the archive 

245 archive, member = os.path.split(path) 

246 while archive and not os.path.exists(archive): 

247 archive, prefix = os.path.split(archive) 

248 member = '/'.join([prefix, member]) 

249 

250 if not zipfile.is_zipfile(archive): 

251 return path 

252 

253 zip_file = zipfile.ZipFile(archive) 

254 if member not in zip_file.namelist(): 

255 return path 

256 

257 # we have a valid zip archive and a valid member of that archive 

258 tmp = tempfile.gettempdir() 

259 extracted_path = os.path.join(tmp, *member.split('/')) 

260 if not os.path.exists(extracted_path): 

261 extracted_path = zip_file.extract(member, path=tmp) 

262 

263 return extracted_path 

264 

265 

266def from_key_val_list(value): 

267 """Take an object and test to see if it can be represented as a 

268 dictionary. Unless it can not be represented as such, return an 

269 OrderedDict, e.g., 

270 

271 :: 

272 

273 >>> from_key_val_list([('key', 'val')]) 

274 OrderedDict([('key', 'val')]) 

275 >>> from_key_val_list('string') 

276 Traceback (most recent call last): 

277 ... 

278 ValueError: cannot encode objects that are not 2-tuples 

279 >>> from_key_val_list({'key': 'val'}) 

280 OrderedDict([('key', 'val')]) 

281 

282 :rtype: OrderedDict 

283 """ 

284 if value is None: 

285 return None 

286 

287 if isinstance(value, (str, bytes, bool, int)): 

288 raise ValueError('cannot encode objects that are not 2-tuples') 

289 

290 return OrderedDict(value) 

291 

292 

293def to_key_val_list(value): 

294 """Take an object and test to see if it can be represented as a 

295 dictionary. If it can be, return a list of tuples, e.g., 

296 

297 :: 

298 

299 >>> to_key_val_list([('key', 'val')]) 

300 [('key', 'val')] 

301 >>> to_key_val_list({'key': 'val'}) 

302 [('key', 'val')] 

303 >>> to_key_val_list('string') 

304 Traceback (most recent call last): 

305 ... 

306 ValueError: cannot encode objects that are not 2-tuples 

307 

308 :rtype: list 

309 """ 

310 if value is None: 

311 return None 

312 

313 if isinstance(value, (str, bytes, bool, int)): 

314 raise ValueError('cannot encode objects that are not 2-tuples') 

315 

316 if isinstance(value, Mapping): 

317 value = value.items() 

318 

319 return list(value) 

320 

321 

322# From mitsuhiko/werkzeug (used with permission). 

323def parse_list_header(value): 

324 """Parse lists as described by RFC 2068 Section 2. 

325 

326 In particular, parse comma-separated lists where the elements of 

327 the list may include quoted-strings. A quoted-string could 

328 contain a comma. A non-quoted string could have quotes in the 

329 middle. Quotes are removed automatically after parsing. 

330 

331 It basically works like :func:`parse_set_header` just that items 

332 may appear multiple times and case sensitivity is preserved. 

333 

334 The return value is a standard :class:`list`: 

335 

336 >>> parse_list_header('token, "quoted value"') 

337 ['token', 'quoted value'] 

338 

339 To create a header from the :class:`list` again, use the 

340 :func:`dump_header` function. 

341 

342 :param value: a string with a list header. 

343 :return: :class:`list` 

344 :rtype: list 

345 """ 

346 result = [] 

347 for item in _parse_list_header(value): 

348 if item[:1] == item[-1:] == '"': 

349 item = unquote_header_value(item[1:-1]) 

350 result.append(item) 

351 return result 

352 

353 

354# From mitsuhiko/werkzeug (used with permission). 

355def parse_dict_header(value): 

356 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and 

357 convert them into a python dict: 

358 

359 >>> d = parse_dict_header('foo="is a fish", bar="as well"') 

360 >>> type(d) is dict 

361 True 

362 >>> sorted(d.items()) 

363 [('bar', 'as well'), ('foo', 'is a fish')] 

364 

365 If there is no value for a key it will be `None`: 

366 

367 >>> parse_dict_header('key_without_value') 

368 {'key_without_value': None} 

369 

370 To create a header from the :class:`dict` again, use the 

371 :func:`dump_header` function. 

372 

373 :param value: a string with a dict header. 

374 :return: :class:`dict` 

375 :rtype: dict 

376 """ 

377 result = {} 

378 for item in _parse_list_header(value): 

379 if '=' not in item: 

380 result[item] = None 

381 continue 

382 name, value = item.split('=', 1) 

383 if value[:1] == value[-1:] == '"': 

384 value = unquote_header_value(value[1:-1]) 

385 result[name] = value 

386 return result 

387 

388 

389# From mitsuhiko/werkzeug (used with permission). 

390def unquote_header_value(value, is_filename=False): 

391 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). 

392 This does not use the real unquoting but what browsers are actually 

393 using for quoting. 

394 

395 :param value: the header value to unquote. 

396 :rtype: str 

397 """ 

398 if value and value[0] == value[-1] == '"': 

399 # this is not the real unquoting, but fixing this so that the 

400 # RFC is met will result in bugs with internet explorer and 

401 # probably some other browsers as well. IE for example is 

402 # uploading files with "C:\foo\bar.txt" as filename 

403 value = value[1:-1] 

404 

405 # if this is a filename and the starting characters look like 

406 # a UNC path, then just return the value without quotes. Using the 

407 # replace sequence below on a UNC path has the effect of turning 

408 # the leading double slash into a single slash and then 

409 # _fix_ie_filename() doesn't work correctly. See #458. 

410 if not is_filename or value[:2] != '\\\\': 

411 return value.replace('\\\\', '\\').replace('\\"', '"') 

412 return value 

413 

414 

415def dict_from_cookiejar(cj): 

416 """Returns a key/value dictionary from a CookieJar. 

417 

418 :param cj: CookieJar object to extract cookies from. 

419 :rtype: dict 

420 """ 

421 

422 cookie_dict = {} 

423 

424 for cookie in cj: 

425 cookie_dict[cookie.name] = cookie.value 

426 

427 return cookie_dict 

428 

429 

430def add_dict_to_cookiejar(cj, cookie_dict): 

431 """Returns a CookieJar from a key/value dictionary. 

432 

433 :param cj: CookieJar to insert cookies into. 

434 :param cookie_dict: Dict of key/values to insert into CookieJar. 

435 :rtype: CookieJar 

436 """ 

437 

438 return cookiejar_from_dict(cookie_dict, cj) 

439 

440 

441def get_encodings_from_content(content): 

442 """Returns encodings from given content string. 

443 

444 :param content: bytestring to extract encodings from. 

445 """ 

446 warnings.warn(( 

447 'In requests 3.0, get_encodings_from_content will be removed. For ' 

448 'more information, please see the discussion on issue #2266. (This' 

449 ' warning should only appear once.)'), 

450 DeprecationWarning) 

451 

452 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) 

453 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I) 

454 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') 

455 

456 return (charset_re.findall(content) + 

457 pragma_re.findall(content) + 

458 xml_re.findall(content)) 

459 

460 

461def _parse_content_type_header(header): 

462 """Returns content type and parameters from given header 

463 

464 :param header: string 

465 :return: tuple containing content type and dictionary of 

466 parameters 

467 """ 

468 

469 tokens = header.split(';') 

470 content_type, params = tokens[0].strip(), tokens[1:] 

471 params_dict = {} 

472 items_to_strip = "\"' " 

473 

474 for param in params: 

475 param = param.strip() 

476 if param: 

477 key, value = param, True 

478 index_of_equals = param.find("=") 

479 if index_of_equals != -1: 

480 key = param[:index_of_equals].strip(items_to_strip) 

481 value = param[index_of_equals + 1:].strip(items_to_strip) 

482 params_dict[key.lower()] = value 

483 return content_type, params_dict 

484 

485 

486def get_encoding_from_headers(headers): 

487 """Returns encodings from given HTTP Header Dict. 

488 

489 :param headers: dictionary to extract encoding from. 

490 :rtype: str 

491 """ 

492 

493 content_type = headers.get('content-type') 

494 

495 if not content_type: 

496 return None 

497 

498 content_type, params = _parse_content_type_header(content_type) 

499 

500 if 'charset' in params: 

501 return params['charset'].strip("'\"") 

502 

503 if 'text' in content_type: 

504 return 'ISO-8859-1' 

505 

506 if 'application/json' in content_type: 

507 # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset 

508 return 'utf-8' 

509 

510 

511def stream_decode_response_unicode(iterator, r): 

512 """Stream decodes a iterator.""" 

513 

514 if r.encoding is None: 

515 for item in iterator: 

516 yield item 

517 return 

518 

519 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') 

520 for chunk in iterator: 

521 rv = decoder.decode(chunk) 

522 if rv: 

523 yield rv 

524 rv = decoder.decode(b'', final=True) 

525 if rv: 

526 yield rv 

527 

528 

529def iter_slices(string, slice_length): 

530 """Iterate over slices of a string.""" 

531 pos = 0 

532 if slice_length is None or slice_length <= 0: 

533 slice_length = len(string) 

534 while pos < len(string): 

535 yield string[pos:pos + slice_length] 

536 pos += slice_length 

537 

538 

539def get_unicode_from_response(r): 

540 """Returns the requested content back in unicode. 

541 

542 :param r: Response object to get unicode content from. 

543 

544 Tried: 

545 

546 1. charset from content-type 

547 2. fall back and replace all unicode characters 

548 

549 :rtype: str 

550 """ 

551 warnings.warn(( 

552 'In requests 3.0, get_unicode_from_response will be removed. For ' 

553 'more information, please see the discussion on issue #2266. (This' 

554 ' warning should only appear once.)'), 

555 DeprecationWarning) 

556 

557 tried_encodings = [] 

558 

559 # Try charset from content-type 

560 encoding = get_encoding_from_headers(r.headers) 

561 

562 if encoding: 

563 try: 

564 return str(r.content, encoding) 

565 except UnicodeError: 

566 tried_encodings.append(encoding) 

567 

568 # Fall back: 

569 try: 

570 return str(r.content, encoding, errors='replace') 

571 except TypeError: 

572 return r.content 

573 

574 

575# The unreserved URI characters (RFC 3986) 

576UNRESERVED_SET = frozenset( 

577 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") 

578 

579 

580def unquote_unreserved(uri): 

581 """Un-escape any percent-escape sequences in a URI that are unreserved 

582 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. 

583 

584 :rtype: str 

585 """ 

586 parts = uri.split('%') 

587 for i in range(1, len(parts)): 

588 h = parts[i][0:2] 

589 if len(h) == 2 and h.isalnum(): 

590 try: 

591 c = chr(int(h, 16)) 

592 except ValueError: 

593 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) 

594 

595 if c in UNRESERVED_SET: 

596 parts[i] = c + parts[i][2:] 

597 else: 

598 parts[i] = '%' + parts[i] 

599 else: 

600 parts[i] = '%' + parts[i] 

601 return ''.join(parts) 

602 

603 

604def requote_uri(uri): 

605 """Re-quote the given URI. 

606 

607 This function passes the given URI through an unquote/quote cycle to 

608 ensure that it is fully and consistently quoted. 

609 

610 :rtype: str 

611 """ 

612 safe_with_percent = "!#$%&'()*+,/:;=?@[]~" 

613 safe_without_percent = "!#$&'()*+,/:;=?@[]~" 

614 try: 

615 # Unquote only the unreserved characters 

616 # Then quote only illegal characters (do not quote reserved, 

617 # unreserved, or '%') 

618 return quote(unquote_unreserved(uri), safe=safe_with_percent) 

619 except InvalidURL: 

620 # We couldn't unquote the given URI, so let's try quoting it, but 

621 # there may be unquoted '%'s in the URI. We need to make sure they're 

622 # properly quoted so they do not cause issues elsewhere. 

623 return quote(uri, safe=safe_without_percent) 

624 

625 

626def address_in_network(ip, net): 

627 """This function allows you to check if an IP belongs to a network subnet 

628 

629 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 

630 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 

631 

632 :rtype: bool 

633 """ 

634 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] 

635 netaddr, bits = net.split('/') 

636 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0] 

637 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask 

638 return (ipaddr & netmask) == (network & netmask) 

639 

640 

641def dotted_netmask(mask): 

642 """Converts mask from /xx format to xxx.xxx.xxx.xxx 

643 

644 Example: if mask is 24 function returns 255.255.255.0 

645 

646 :rtype: str 

647 """ 

648 bits = 0xffffffff ^ (1 << 32 - mask) - 1 

649 return socket.inet_ntoa(struct.pack('>I', bits)) 

650 

651 

652def is_ipv4_address(string_ip): 

653 """ 

654 :rtype: bool 

655 """ 

656 try: 

657 socket.inet_aton(string_ip) 

658 except socket.error: 

659 return False 

660 return True 

661 

662 

663def is_valid_cidr(string_network): 

664 """ 

665 Very simple check of the cidr format in no_proxy variable. 

666 

667 :rtype: bool 

668 """ 

669 if string_network.count('/') == 1: 

670 try: 

671 mask = int(string_network.split('/')[1]) 

672 except ValueError: 

673 return False 

674 

675 if mask < 1 or mask > 32: 

676 return False 

677 

678 try: 

679 socket.inet_aton(string_network.split('/')[0]) 

680 except socket.error: 

681 return False 

682 else: 

683 return False 

684 return True 

685 

686 

687@contextlib.contextmanager 

688def set_environ(env_name, value): 

689 """Set the environment variable 'env_name' to 'value' 

690 

691 Save previous value, yield, and then restore the previous value stored in 

692 the environment variable 'env_name'. 

693 

694 If 'value' is None, do nothing""" 

695 value_changed = value is not None 

696 if value_changed: 

697 old_value = os.environ.get(env_name) 

698 os.environ[env_name] = value 

699 try: 

700 yield 

701 finally: 

702 if value_changed: 

703 if old_value is None: 

704 del os.environ[env_name] 

705 else: 

706 os.environ[env_name] = old_value 

707 

708 

709def should_bypass_proxies(url, no_proxy): 

710 """ 

711 Returns whether we should bypass proxies or not. 

712 

713 :rtype: bool 

714 """ 

715 # Prioritize lowercase environment variables over uppercase 

716 # to keep a consistent behaviour with other http projects (curl, wget). 

717 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) 

718 

719 # First check whether no_proxy is defined. If it is, check that the URL 

720 # we're getting isn't in the no_proxy list. 

721 no_proxy_arg = no_proxy 

722 if no_proxy is None: 

723 no_proxy = get_proxy('no_proxy') 

724 parsed = urlparse(url) 

725 

726 if parsed.hostname is None: 

727 # URLs don't always have hostnames, e.g. file:/// urls. 

728 return True 

729 

730 if no_proxy: 

731 # We need to check whether we match here. We need to see if we match 

732 # the end of the hostname, both with and without the port. 

733 no_proxy = ( 

734 host for host in no_proxy.replace(' ', '').split(',') if host 

735 ) 

736 

737 if is_ipv4_address(parsed.hostname): 

738 for proxy_ip in no_proxy: 

739 if is_valid_cidr(proxy_ip): 

740 if address_in_network(parsed.hostname, proxy_ip): 

741 return True 

742 elif parsed.hostname == proxy_ip: 

743 # If no_proxy ip was defined in plain IP notation instead of cidr notation & 

744 # matches the IP of the index 

745 return True 

746 else: 

747 host_with_port = parsed.hostname 

748 if parsed.port: 

749 host_with_port += ':{}'.format(parsed.port) 

750 

751 for host in no_proxy: 

752 if parsed.hostname.endswith(host) or host_with_port.endswith(host): 

753 # The URL does match something in no_proxy, so we don't want 

754 # to apply the proxies on this URL. 

755 return True 

756 

757 with set_environ('no_proxy', no_proxy_arg): 

758 # parsed.hostname can be `None` in cases such as a file URI. 

759 try: 

760 bypass = proxy_bypass(parsed.hostname) 

761 except (TypeError, socket.gaierror): 

762 bypass = False 

763 

764 if bypass: 

765 return True 

766 

767 return False 

768 

769 

770def get_environ_proxies(url, no_proxy=None): 

771 """ 

772 Return a dict of environment proxies. 

773 

774 :rtype: dict 

775 """ 

776 if should_bypass_proxies(url, no_proxy=no_proxy): 

777 return {} 

778 else: 

779 return getproxies() 

780 

781 

782def select_proxy(url, proxies): 

783 """Select a proxy for the url, if applicable. 

784 

785 :param url: The url being for the request 

786 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs 

787 """ 

788 proxies = proxies or {} 

789 urlparts = urlparse(url) 

790 if urlparts.hostname is None: 

791 return proxies.get(urlparts.scheme, proxies.get('all')) 

792 

793 proxy_keys = [ 

794 urlparts.scheme + '://' + urlparts.hostname, 

795 urlparts.scheme, 

796 'all://' + urlparts.hostname, 

797 'all', 

798 ] 

799 proxy = None 

800 for proxy_key in proxy_keys: 

801 if proxy_key in proxies: 

802 proxy = proxies[proxy_key] 

803 break 

804 

805 return proxy 

806 

807 

808def default_user_agent(name="python-requests"): 

809 """ 

810 Return a string representing the default user agent. 

811 

812 :rtype: str 

813 """ 

814 return '%s/%s' % (name, __version__) 

815 

816 

817def default_headers(): 

818 """ 

819 :rtype: requests.structures.CaseInsensitiveDict 

820 """ 

821 return CaseInsensitiveDict({ 

822 'User-Agent': default_user_agent(), 

823 'Accept-Encoding': ', '.join(('gzip', 'deflate')), 

824 'Accept': '*/*', 

825 'Connection': 'keep-alive', 

826 }) 

827 

828 

829def parse_header_links(value): 

830 """Return a list of parsed link headers proxies. 

831 

832 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg" 

833 

834 :rtype: list 

835 """ 

836 

837 links = [] 

838 

839 replace_chars = ' \'"' 

840 

841 value = value.strip(replace_chars) 

842 if not value: 

843 return links 

844 

845 for val in re.split(', *<', value): 

846 try: 

847 url, params = val.split(';', 1) 

848 except ValueError: 

849 url, params = val, '' 

850 

851 link = {'url': url.strip('<> \'"')} 

852 

853 for param in params.split(';'): 

854 try: 

855 key, value = param.split('=') 

856 except ValueError: 

857 break 

858 

859 link[key.strip(replace_chars)] = value.strip(replace_chars) 

860 

861 links.append(link) 

862 

863 return links 

864 

865 

866# Null bytes; no need to recreate these on each call to guess_json_utf 

867_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 

868_null2 = _null * 2 

869_null3 = _null * 3 

870 

871 

872def guess_json_utf(data): 

873 """ 

874 :rtype: str 

875 """ 

876 # JSON always starts with two ASCII characters, so detection is as 

877 # easy as counting the nulls and from their location and count 

878 # determine the encoding. Also detect a BOM, if present. 

879 sample = data[:4] 

880 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): 

881 return 'utf-32' # BOM included 

882 if sample[:3] == codecs.BOM_UTF8: 

883 return 'utf-8-sig' # BOM included, MS style (discouraged) 

884 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): 

885 return 'utf-16' # BOM included 

886 nullcount = sample.count(_null) 

887 if nullcount == 0: 

888 return 'utf-8' 

889 if nullcount == 2: 

890 if sample[::2] == _null2: # 1st and 3rd are null 

891 return 'utf-16-be' 

892 if sample[1::2] == _null2: # 2nd and 4th are null 

893 return 'utf-16-le' 

894 # Did not detect 2 valid UTF-16 ascii-range characters 

895 if nullcount == 3: 

896 if sample[:3] == _null3: 

897 return 'utf-32-be' 

898 if sample[1:] == _null3: 

899 return 'utf-32-le' 

900 # Did not detect a valid UTF-32 ascii-range character 

901 return None 

902 

903 

904def prepend_scheme_if_needed(url, new_scheme): 

905 """Given a URL that may or may not have a scheme, prepend the given scheme. 

906 Does not replace a present scheme with the one provided as an argument. 

907 

908 :rtype: str 

909 """ 

910 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) 

911 

912 # urlparse is a finicky beast, and sometimes decides that there isn't a 

913 # netloc present. Assume that it's being over-cautious, and switch netloc 

914 # and path if urlparse decided there was no netloc. 

915 if not netloc: 

916 netloc, path = path, netloc 

917 

918 return urlunparse((scheme, netloc, path, params, query, fragment)) 

919 

920 

921def get_auth_from_url(url): 

922 """Given a url with authentication components, extract them into a tuple of 

923 username,password. 

924 

925 :rtype: (str,str) 

926 """ 

927 parsed = urlparse(url) 

928 

929 try: 

930 auth = (unquote(parsed.username), unquote(parsed.password)) 

931 except (AttributeError, TypeError): 

932 auth = ('', '') 

933 

934 return auth 

935 

936 

937# Moved outside of function to avoid recompile every call 

938_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') 

939_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') 

940 

941 

942def check_header_validity(header): 

943 """Verifies that header value is a string which doesn't contain 

944 leading whitespace or return characters. This prevents unintended 

945 header injection. 

946 

947 :param header: tuple, in the format (name, value). 

948 """ 

949 name, value = header 

950 

951 if isinstance(value, bytes): 

952 pat = _CLEAN_HEADER_REGEX_BYTE 

953 else: 

954 pat = _CLEAN_HEADER_REGEX_STR 

955 try: 

956 if not pat.match(value): 

957 raise InvalidHeader("Invalid return character or leading space in header: %s" % name) 

958 except TypeError: 

959 raise InvalidHeader("Value for header {%s: %s} must be of type str or " 

960 "bytes, not %s" % (name, value, type(value))) 

961 

962 

963def urldefragauth(url): 

964 """ 

965 Given a url remove the fragment and the authentication part. 

966 

967 :rtype: str 

968 """ 

969 scheme, netloc, path, params, query, fragment = urlparse(url) 

970 

971 # see func:`prepend_scheme_if_needed` 

972 if not netloc: 

973 netloc, path = path, netloc 

974 

975 netloc = netloc.rsplit('@', 1)[-1] 

976 

977 return urlunparse((scheme, netloc, path, params, query, '')) 

978 

979 

980def rewind_body(prepared_request): 

981 """Move file pointer back to its recorded starting position 

982 so it can be read again on redirect. 

983 """ 

984 body_seek = getattr(prepared_request.body, 'seek', None) 

985 if body_seek is not None and isinstance(prepared_request._body_position, integer_types): 

986 try: 

987 body_seek(prepared_request._body_position) 

988 except (IOError, OSError): 

989 raise UnrewindableBodyError("An error occurred when rewinding request " 

990 "body for redirect.") 

991 else: 

992 raise UnrewindableBodyError("Unable to rewind request body for redirect.")