Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Printing tools. 

3""" 

4 

5import sys 

6from typing import ( 

7 Any, 

8 Callable, 

9 Iterable, 

10 List, 

11 Mapping, 

12 Optional, 

13 Sequence, 

14 Tuple, 

15 Union, 

16) 

17 

18from pandas._config import get_option 

19 

20from pandas.core.dtypes.inference import is_sequence 

21 

22EscapeChars = Union[Mapping[str, str], Iterable[str]] 

23 

24 

25def adjoin(space: int, *lists: List[str], **kwargs) -> str: 

26 """ 

27 Glues together two sets of strings using the amount of space requested. 

28 The idea is to prettify. 

29 

30 ---------- 

31 space : int 

32 number of spaces for padding 

33 lists : str 

34 list of str which being joined 

35 strlen : callable 

36 function used to calculate the length of each str. Needed for unicode 

37 handling. 

38 justfunc : callable 

39 function used to justify str. Needed for unicode handling. 

40 """ 

41 strlen = kwargs.pop("strlen", len) 

42 justfunc = kwargs.pop("justfunc", justify) 

43 

44 out_lines = [] 

45 newLists = [] 

46 lengths = [max(map(strlen, x)) + space for x in lists[:-1]] 

47 # not the last one 

48 lengths.append(max(map(len, lists[-1]))) 

49 maxLen = max(map(len, lists)) 

50 for i, lst in enumerate(lists): 

51 nl = justfunc(lst, lengths[i], mode="left") 

52 nl.extend([" " * lengths[i]] * (maxLen - len(lst))) 

53 newLists.append(nl) 

54 toJoin = zip(*newLists) 

55 for lines in toJoin: 

56 out_lines.append("".join(lines)) 

57 return "\n".join(out_lines) 

58 

59 

60def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> List[str]: 

61 """ 

62 Perform ljust, center, rjust against string or list-like 

63 """ 

64 if mode == "left": 

65 return [x.ljust(max_len) for x in texts] 

66 elif mode == "center": 

67 return [x.center(max_len) for x in texts] 

68 else: 

69 return [x.rjust(max_len) for x in texts] 

70 

71 

72# Unicode consolidation 

73# --------------------- 

74# 

75# pprinting utility functions for generating Unicode text or 

76# bytes(3.x)/str(2.x) representations of objects. 

77# Try to use these as much as possible rather then rolling your own. 

78# 

79# When to use 

80# ----------- 

81# 

82# 1) If you're writing code internal to pandas (no I/O directly involved), 

83# use pprint_thing(). 

84# 

85# It will always return unicode text which can handled by other 

86# parts of the package without breakage. 

87# 

88# 2) if you need to write something out to file, use 

89# pprint_thing_encoded(encoding). 

90# 

91# If no encoding is specified, it defaults to utf-8. Since encoding pure 

92# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're 

93# working with straight ascii. 

94 

95 

96def _pprint_seq( 

97 seq: Sequence, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds 

98) -> str: 

99 """ 

100 internal. pprinter for iterables. you should probably use pprint_thing() 

101 rather then calling this directly. 

102 

103 bounds length of printed sequence, depending on options 

104 """ 

105 if isinstance(seq, set): 

106 fmt = "{{{body}}}" 

107 else: 

108 fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" 

109 

110 if max_seq_items is False: 

111 nitems = len(seq) 

112 else: 

113 nitems = max_seq_items or get_option("max_seq_items") or len(seq) 

114 

115 s = iter(seq) 

116 # handle sets, no slicing 

117 r = [ 

118 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) 

119 for i in range(min(nitems, len(seq))) 

120 ] 

121 body = ", ".join(r) 

122 

123 if nitems < len(seq): 

124 body += ", ..." 

125 elif isinstance(seq, tuple) and len(seq) == 1: 

126 body += "," 

127 

128 return fmt.format(body=body) 

129 

130 

131def _pprint_dict( 

132 seq: Mapping, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds 

133) -> str: 

134 """ 

135 internal. pprinter for iterables. you should probably use pprint_thing() 

136 rather then calling this directly. 

137 """ 

138 fmt = "{{{things}}}" 

139 pairs = [] 

140 

141 pfmt = "{key}: {val}" 

142 

143 if max_seq_items is False: 

144 nitems = len(seq) 

145 else: 

146 nitems = max_seq_items or get_option("max_seq_items") or len(seq) 

147 

148 for k, v in list(seq.items())[:nitems]: 

149 pairs.append( 

150 pfmt.format( 

151 key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), 

152 val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), 

153 ) 

154 ) 

155 

156 if nitems < len(seq): 

157 return fmt.format(things=", ".join(pairs) + ", ...") 

158 else: 

159 return fmt.format(things=", ".join(pairs)) 

160 

161 

162def pprint_thing( 

163 thing: Any, 

164 _nest_lvl: int = 0, 

165 escape_chars: Optional[EscapeChars] = None, 

166 default_escapes: bool = False, 

167 quote_strings: bool = False, 

168 max_seq_items: Optional[int] = None, 

169) -> str: 

170 """ 

171 This function is the sanctioned way of converting objects 

172 to a string representation and properly handles nested sequences. 

173 

174 Parameters 

175 ---------- 

176 thing : anything to be formatted 

177 _nest_lvl : internal use only. pprint_thing() is mutually-recursive 

178 with pprint_sequence, this argument is used to keep track of the 

179 current nesting level, and limit it. 

180 escape_chars : list or dict, optional 

181 Characters to escape. If a dict is passed the values are the 

182 replacements 

183 default_escapes : bool, default False 

184 Whether the input escape characters replaces or adds to the defaults 

185 max_seq_items : int or None, default None 

186 Pass through to other pretty printers to limit sequence printing 

187 

188 Returns 

189 ------- 

190 str 

191 """ 

192 

193 def as_escaped_string( 

194 thing: Any, escape_chars: Optional[EscapeChars] = escape_chars 

195 ) -> str: 

196 translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} 

197 if isinstance(escape_chars, dict): 

198 if default_escapes: 

199 translate.update(escape_chars) 

200 else: 

201 translate = escape_chars 

202 escape_chars = list(escape_chars.keys()) 

203 else: 

204 escape_chars = escape_chars or tuple() 

205 

206 result = str(thing) 

207 for c in escape_chars: 

208 result = result.replace(c, translate[c]) 

209 return result 

210 

211 if hasattr(thing, "__next__"): 

212 return str(thing) 

213 elif isinstance(thing, dict) and _nest_lvl < get_option( 

214 "display.pprint_nest_depth" 

215 ): 

216 result = _pprint_dict( 

217 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items 

218 ) 

219 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): 

220 result = _pprint_seq( 

221 thing, 

222 _nest_lvl, 

223 escape_chars=escape_chars, 

224 quote_strings=quote_strings, 

225 max_seq_items=max_seq_items, 

226 ) 

227 elif isinstance(thing, str) and quote_strings: 

228 result = "'{thing}'".format(thing=as_escaped_string(thing)) 

229 else: 

230 result = as_escaped_string(thing) 

231 

232 return result 

233 

234 

235def pprint_thing_encoded( 

236 object, encoding: str = "utf-8", errors: str = "replace" 

237) -> bytes: 

238 value = pprint_thing(object) # get unicode representation of object 

239 return value.encode(encoding, errors) 

240 

241 

242def _enable_data_resource_formatter(enable: bool) -> None: 

243 if "IPython" not in sys.modules: 

244 # definitely not in IPython 

245 return 

246 from IPython import get_ipython 

247 

248 ip = get_ipython() 

249 if ip is None: 

250 # still not in IPython 

251 return 

252 

253 formatters = ip.display_formatter.formatters 

254 mimetype = "application/vnd.dataresource+json" 

255 

256 if enable: 

257 if mimetype not in formatters: 

258 # define tableschema formatter 

259 from IPython.core.formatters import BaseFormatter 

260 

261 class TableSchemaFormatter(BaseFormatter): 

262 print_method = "_repr_data_resource_" 

263 _return_type = (dict,) 

264 

265 # register it: 

266 formatters[mimetype] = TableSchemaFormatter() 

267 # enable it if it's been disabled: 

268 formatters[mimetype].enabled = True 

269 else: 

270 # unregister tableschema mime-type 

271 if mimetype in formatters: 

272 formatters[mimetype].enabled = False 

273 

274 

275default_pprint = lambda x, max_seq_items=None: pprint_thing( 

276 x, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items 

277) 

278 

279 

280def format_object_summary( 

281 obj, 

282 formatter: Callable, 

283 is_justify: bool = True, 

284 name: Optional[str] = None, 

285 indent_for_name: bool = True, 

286 line_break_each_value: bool = False, 

287) -> str: 

288 """ 

289 Return the formatted obj as a unicode string 

290 

291 Parameters 

292 ---------- 

293 obj : object 

294 must be iterable and support __getitem__ 

295 formatter : callable 

296 string formatter for an element 

297 is_justify : boolean 

298 should justify the display 

299 name : name, optional 

300 defaults to the class name of the obj 

301 indent_for_name : bool, default True 

302 Whether subsequent lines should be be indented to 

303 align with the name. 

304 line_break_each_value : bool, default False 

305 If True, inserts a line break for each value of ``obj``. 

306 If False, only break lines when the a line of values gets wider 

307 than the display width. 

308 

309 .. versionadded:: 0.25.0 

310 

311 Returns 

312 ------- 

313 summary string 

314 """ 

315 from pandas.io.formats.console import get_console_size 

316 from pandas.io.formats.format import _get_adjustment 

317 

318 display_width, _ = get_console_size() 

319 if display_width is None: 

320 display_width = get_option("display.width") or 80 

321 if name is None: 

322 name = type(obj).__name__ 

323 

324 if indent_for_name: 

325 name_len = len(name) 

326 space1 = f'\n{(" " * (name_len + 1))}' 

327 space2 = f'\n{(" " * (name_len + 2))}' 

328 else: 

329 space1 = "\n" 

330 space2 = "\n " # space for the opening '[' 

331 

332 n = len(obj) 

333 if line_break_each_value: 

334 # If we want to vertically align on each value of obj, we need to 

335 # separate values by a line break and indent the values 

336 sep = ",\n " + " " * len(name) 

337 else: 

338 sep = "," 

339 max_seq_items = get_option("display.max_seq_items") or n 

340 

341 # are we a truncated display 

342 is_truncated = n > max_seq_items 

343 

344 # adj can optionally handle unicode eastern asian width 

345 adj = _get_adjustment() 

346 

347 def _extend_line( 

348 s: str, line: str, value: str, display_width: int, next_line_prefix: str 

349 ) -> Tuple[str, str]: 

350 

351 if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: 

352 s += line.rstrip() 

353 line = next_line_prefix 

354 line += value 

355 return s, line 

356 

357 def best_len(values: List[str]) -> int: 

358 if values: 

359 return max(adj.len(x) for x in values) 

360 else: 

361 return 0 

362 

363 close = ", " 

364 

365 if n == 0: 

366 summary = f"[]{close}" 

367 elif n == 1 and not line_break_each_value: 

368 first = formatter(obj[0]) 

369 summary = f"[{first}]{close}" 

370 elif n == 2 and not line_break_each_value: 

371 first = formatter(obj[0]) 

372 last = formatter(obj[-1]) 

373 summary = f"[{first}, {last}]{close}" 

374 else: 

375 

376 if n > max_seq_items: 

377 n = min(max_seq_items // 2, 10) 

378 head = [formatter(x) for x in obj[:n]] 

379 tail = [formatter(x) for x in obj[-n:]] 

380 else: 

381 head = [] 

382 tail = [formatter(x) for x in obj] 

383 

384 # adjust all values to max length if needed 

385 if is_justify: 

386 if line_break_each_value: 

387 # Justify each string in the values of head and tail, so the 

388 # strings will right align when head and tail are stacked 

389 # vertically. 

390 head, tail = _justify(head, tail) 

391 elif is_truncated or not ( 

392 len(", ".join(head)) < display_width 

393 and len(", ".join(tail)) < display_width 

394 ): 

395 # Each string in head and tail should align with each other 

396 max_length = max(best_len(head), best_len(tail)) 

397 head = [x.rjust(max_length) for x in head] 

398 tail = [x.rjust(max_length) for x in tail] 

399 # If we are not truncated and we are only a single 

400 # line, then don't justify 

401 

402 if line_break_each_value: 

403 # Now head and tail are of type List[Tuple[str]]. Below we 

404 # convert them into List[str], so there will be one string per 

405 # value. Also truncate items horizontally if wider than 

406 # max_space 

407 max_space = display_width - len(space2) 

408 value = tail[0] 

409 for max_items in reversed(range(1, len(value) + 1)): 

410 pprinted_seq = _pprint_seq(value, max_seq_items=max_items) 

411 if len(pprinted_seq) < max_space: 

412 break 

413 head = [_pprint_seq(x, max_seq_items=max_items) for x in head] 

414 tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] 

415 

416 summary = "" 

417 line = space2 

418 

419 for max_items in range(len(head)): 

420 word = head[max_items] + sep + " " 

421 summary, line = _extend_line(summary, line, word, display_width, space2) 

422 

423 if is_truncated: 

424 # remove trailing space of last line 

425 summary += line.rstrip() + space2 + "..." 

426 line = space2 

427 

428 for max_items in range(len(tail) - 1): 

429 word = tail[max_items] + sep + " " 

430 summary, line = _extend_line(summary, line, word, display_width, space2) 

431 

432 # last value: no sep added + 1 space of width used for trailing ',' 

433 summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) 

434 summary += line 

435 

436 # right now close is either '' or ', ' 

437 # Now we want to include the ']', but not the maybe space. 

438 close = "]" + close.rstrip(" ") 

439 summary += close 

440 

441 if len(summary) > (display_width) or line_break_each_value: 

442 summary += space1 

443 else: # one row 

444 summary += " " 

445 

446 # remove initial space 

447 summary = "[" + summary[len(space2) :] 

448 

449 return summary 

450 

451 

452def _justify( 

453 head: List[Sequence[str]], tail: List[Sequence[str]] 

454) -> Tuple[List[Tuple[str, ...]], List[Tuple[str, ...]]]: 

455 """ 

456 Justify items in head and tail, so they are right-aligned when stacked. 

457 

458 Parameters 

459 ---------- 

460 head : list-like of list-likes of strings 

461 tail : list-like of list-likes of strings 

462 

463 Returns 

464 ------- 

465 tuple of list of tuples of strings 

466 Same as head and tail, but items are right aligned when stacked 

467 vertically. 

468 

469 Examples 

470 -------- 

471 >>> _justify([['a', 'b']], [['abc', 'abcd']]) 

472 ([(' a', ' b')], [('abc', 'abcd')]) 

473 """ 

474 combined = head + tail 

475 

476 # For each position for the sequences in ``combined``, 

477 # find the length of the largest string. 

478 max_length = [0] * len(combined[0]) 

479 for inner_seq in combined: 

480 length = [len(item) for item in inner_seq] 

481 max_length = [max(x, y) for x, y in zip(max_length, length)] 

482 

483 # justify each item in each list-like in head and tail using max_length 

484 head = [ 

485 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head 

486 ] 

487 tail = [ 

488 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail 

489 ] 

490 # https://github.com/python/mypy/issues/4975 

491 # error: Incompatible return value type (got "Tuple[List[Sequence[str]], 

492 # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]], 

493 # List[Tuple[str, ...]]]") 

494 return head, tail # type: ignore 

495 

496 

497def format_object_attrs( 

498 obj: Sequence, include_dtype: bool = True 

499) -> List[Tuple[str, Union[str, int]]]: 

500 """ 

501 Return a list of tuples of the (attr, formatted_value) 

502 for common attrs, including dtype, name, length 

503 

504 Parameters 

505 ---------- 

506 obj : object 

507 must be iterable 

508 include_dtype : bool 

509 If False, dtype won't be in the returned list 

510 

511 Returns 

512 ------- 

513 list of 2-tuple 

514 

515 """ 

516 attrs: List[Tuple[str, Union[str, int]]] = [] 

517 if hasattr(obj, "dtype") and include_dtype: 

518 # error: "Sequence[Any]" has no attribute "dtype" 

519 attrs.append(("dtype", f"'{obj.dtype}'")) # type: ignore 

520 if getattr(obj, "name", None) is not None: 

521 # error: "Sequence[Any]" has no attribute "name" 

522 attrs.append(("name", default_pprint(obj.name))) # type: ignore 

523 # error: "Sequence[Any]" has no attribute "names" 

524 elif getattr(obj, "names", None) is not None and any(obj.names): # type: ignore 

525 # error: "Sequence[Any]" has no attribute "names" 

526 attrs.append(("names", default_pprint(obj.names))) # type: ignore 

527 max_seq_items = get_option("display.max_seq_items") or len(obj) 

528 if len(obj) > max_seq_items: 

529 attrs.append(("length", len(obj))) 

530 return attrs