Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Internal module for formatting output data in csv, html, 

3and latex files. This module also applies to display formatting. 

4""" 

5 

6from contextlib import contextmanager 

7from datetime import tzinfo 

8import decimal 

9from functools import partial 

10from io import StringIO 

11import math 

12import re 

13from shutil import get_terminal_size 

14from typing import ( 

15 IO, 

16 TYPE_CHECKING, 

17 Any, 

18 Callable, 

19 Dict, 

20 Iterable, 

21 List, 

22 Mapping, 

23 Optional, 

24 Sequence, 

25 Tuple, 

26 Type, 

27 Union, 

28 cast, 

29) 

30from unicodedata import east_asian_width 

31 

32import numpy as np 

33 

34from pandas._config.config import get_option, set_option 

35 

36from pandas._libs import lib 

37from pandas._libs.missing import NA 

38from pandas._libs.tslib import format_array_from_datetime 

39from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT 

40from pandas._libs.tslibs.nattype import NaTType 

41from pandas._typing import FilePathOrBuffer 

42from pandas.errors import AbstractMethodError 

43 

44from pandas.core.dtypes.common import ( 

45 is_categorical_dtype, 

46 is_complex_dtype, 

47 is_datetime64_dtype, 

48 is_datetime64tz_dtype, 

49 is_extension_array_dtype, 

50 is_float, 

51 is_float_dtype, 

52 is_integer, 

53 is_integer_dtype, 

54 is_list_like, 

55 is_numeric_dtype, 

56 is_scalar, 

57 is_timedelta64_dtype, 

58) 

59from pandas.core.dtypes.generic import ( 

60 ABCIndexClass, 

61 ABCMultiIndex, 

62 ABCSeries, 

63 ABCSparseArray, 

64) 

65from pandas.core.dtypes.missing import isna, notna 

66 

67from pandas.core.arrays.datetimes import DatetimeArray 

68from pandas.core.arrays.timedeltas import TimedeltaArray 

69from pandas.core.base import PandasObject 

70import pandas.core.common as com 

71from pandas.core.indexes.api import Index, ensure_index 

72from pandas.core.indexes.datetimes import DatetimeIndex 

73from pandas.core.indexes.timedeltas import TimedeltaIndex 

74 

75from pandas.io.common import stringify_path 

76from pandas.io.formats.printing import adjoin, justify, pprint_thing 

77 

78if TYPE_CHECKING: 

79 from pandas import Series, DataFrame, Categorical 

80 

81formatters_type = Union[ 

82 List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] 

83] 

84float_format_type = Union[str, Callable, "EngFormatter"] 

85 

86common_docstring = """ 

87 Parameters 

88 ---------- 

89 buf : str, Path or StringIO-like, optional, default None 

90 Buffer to write to. If None, the output is returned as a string. 

91 columns : sequence, optional, default None 

92 The subset of columns to write. Writes all columns by default. 

93 col_space : %(col_space_type)s, optional 

94 %(col_space)s. 

95 header : %(header_type)s, optional 

96 %(header)s. 

97 index : bool, optional, default True 

98 Whether to print index (row) labels. 

99 na_rep : str, optional, default 'NaN' 

100 String representation of NAN to use. 

101 formatters : list, tuple or dict of one-param. functions, optional 

102 Formatter functions to apply to columns' elements by position or 

103 name. 

104 The result of each function must be a unicode string. 

105 List/tuple must be of length equal to the number of columns. 

106 float_format : one-parameter function, optional, default None 

107 Formatter function to apply to columns' elements if they are 

108 floats. The result of this function must be a unicode string. 

109 sparsify : bool, optional, default True 

110 Set to False for a DataFrame with a hierarchical index to print 

111 every multiindex key at each row. 

112 index_names : bool, optional, default True 

113 Prints the names of the indexes. 

114 justify : str, default None 

115 How to justify the column labels. If None uses the option from 

116 the print configuration (controlled by set_option), 'right' out 

117 of the box. Valid values are 

118 

119 * left 

120 * right 

121 * center 

122 * justify 

123 * justify-all 

124 * start 

125 * end 

126 * inherit 

127 * match-parent 

128 * initial 

129 * unset. 

130 max_rows : int, optional 

131 Maximum number of rows to display in the console. 

132 min_rows : int, optional 

133 The number of rows to display in the console in a truncated repr 

134 (when number of rows is above `max_rows`). 

135 max_cols : int, optional 

136 Maximum number of columns to display in the console. 

137 show_dimensions : bool, default False 

138 Display DataFrame dimensions (number of rows by number of columns). 

139 decimal : str, default '.' 

140 Character recognized as decimal separator, e.g. ',' in Europe. 

141 """ 

142 

143_VALID_JUSTIFY_PARAMETERS = ( 

144 "left", 

145 "right", 

146 "center", 

147 "justify", 

148 "justify-all", 

149 "start", 

150 "end", 

151 "inherit", 

152 "match-parent", 

153 "initial", 

154 "unset", 

155) 

156 

157return_docstring = """ 

158 Returns 

159 ------- 

160 str or None 

161 If buf is None, returns the result as a string. Otherwise returns 

162 None. 

163 """ 

164 

165 

166class CategoricalFormatter: 

167 def __init__( 

168 self, 

169 categorical: "Categorical", 

170 buf: Optional[IO[str]] = None, 

171 length: bool = True, 

172 na_rep: str = "NaN", 

173 footer: bool = True, 

174 ): 

175 self.categorical = categorical 

176 self.buf = buf if buf is not None else StringIO("") 

177 self.na_rep = na_rep 

178 self.length = length 

179 self.footer = footer 

180 

181 def _get_footer(self) -> str: 

182 footer = "" 

183 

184 if self.length: 

185 if footer: 

186 footer += ", " 

187 footer += "Length: {length}".format(length=len(self.categorical)) 

188 

189 level_info = self.categorical._repr_categories_info() 

190 

191 # Levels are added in a newline 

192 if footer: 

193 footer += "\n" 

194 footer += level_info 

195 

196 return str(footer) 

197 

198 def _get_formatted_values(self) -> List[str]: 

199 return format_array( 

200 self.categorical._internal_get_values(), 

201 None, 

202 float_format=None, 

203 na_rep=self.na_rep, 

204 ) 

205 

206 def to_string(self) -> str: 

207 categorical = self.categorical 

208 

209 if len(categorical) == 0: 

210 if self.footer: 

211 return self._get_footer() 

212 else: 

213 return "" 

214 

215 fmt_values = self._get_formatted_values() 

216 

217 fmt_values = ["{i}".format(i=i) for i in fmt_values] 

218 fmt_values = [i.strip() for i in fmt_values] 

219 values = ", ".join(fmt_values) 

220 result = ["[" + values + "]"] 

221 if self.footer: 

222 footer = self._get_footer() 

223 if footer: 

224 result.append(footer) 

225 

226 return str("\n".join(result)) 

227 

228 

229class SeriesFormatter: 

230 def __init__( 

231 self, 

232 series: "Series", 

233 buf: Optional[IO[str]] = None, 

234 length: Union[bool, str] = True, 

235 header: bool = True, 

236 index: bool = True, 

237 na_rep: str = "NaN", 

238 name: bool = False, 

239 float_format: Optional[str] = None, 

240 dtype: bool = True, 

241 max_rows: Optional[int] = None, 

242 min_rows: Optional[int] = None, 

243 ): 

244 self.series = series 

245 self.buf = buf if buf is not None else StringIO() 

246 self.name = name 

247 self.na_rep = na_rep 

248 self.header = header 

249 self.length = length 

250 self.index = index 

251 self.max_rows = max_rows 

252 self.min_rows = min_rows 

253 

254 if float_format is None: 

255 float_format = get_option("display.float_format") 

256 self.float_format = float_format 

257 self.dtype = dtype 

258 self.adj = _get_adjustment() 

259 

260 self._chk_truncate() 

261 

262 def _chk_truncate(self) -> None: 

263 from pandas.core.reshape.concat import concat 

264 

265 self.tr_row_num: Optional[int] 

266 

267 min_rows = self.min_rows 

268 max_rows = self.max_rows 

269 # truncation determined by max_rows, actual truncated number of rows 

270 # used below by min_rows 

271 truncate_v = max_rows and (len(self.series) > max_rows) 

272 series = self.series 

273 if truncate_v: 

274 max_rows = cast(int, max_rows) 

275 if min_rows: 

276 # if min_rows is set (not None or 0), set max_rows to minimum 

277 # of both 

278 max_rows = min(min_rows, max_rows) 

279 if max_rows == 1: 

280 row_num = max_rows 

281 series = series.iloc[:max_rows] 

282 else: 

283 row_num = max_rows // 2 

284 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) 

285 self.tr_row_num = row_num 

286 else: 

287 self.tr_row_num = None 

288 self.tr_series = series 

289 self.truncate_v = truncate_v 

290 

291 def _get_footer(self) -> str: 

292 name = self.series.name 

293 footer = "" 

294 

295 if getattr(self.series.index, "freq", None) is not None: 

296 footer += "Freq: {freq}".format(freq=self.series.index.freqstr) 

297 

298 if self.name is not False and name is not None: 

299 if footer: 

300 footer += ", " 

301 

302 series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) 

303 footer += ( 

304 ("Name: {sname}".format(sname=series_name)) if name is not None else "" 

305 ) 

306 

307 if self.length is True or (self.length == "truncate" and self.truncate_v): 

308 if footer: 

309 footer += ", " 

310 footer += "Length: {length}".format(length=len(self.series)) 

311 

312 if self.dtype is not False and self.dtype is not None: 

313 name = getattr(self.tr_series.dtype, "name", None) 

314 if name: 

315 if footer: 

316 footer += ", " 

317 footer += "dtype: {typ}".format(typ=pprint_thing(name)) 

318 

319 # level infos are added to the end and in a new line, like it is done 

320 # for Categoricals 

321 if is_categorical_dtype(self.tr_series.dtype): 

322 level_info = self.tr_series._values._repr_categories_info() 

323 if footer: 

324 footer += "\n" 

325 footer += level_info 

326 

327 return str(footer) 

328 

329 def _get_formatted_index(self) -> Tuple[List[str], bool]: 

330 index = self.tr_series.index 

331 is_multi = isinstance(index, ABCMultiIndex) 

332 

333 if is_multi: 

334 have_header = any(name for name in index.names) 

335 fmt_index = index.format(names=True) 

336 else: 

337 have_header = index.name is not None 

338 fmt_index = index.format(name=True) 

339 return fmt_index, have_header 

340 

341 def _get_formatted_values(self) -> List[str]: 

342 return format_array( 

343 self.tr_series._values, 

344 None, 

345 float_format=self.float_format, 

346 na_rep=self.na_rep, 

347 ) 

348 

349 def to_string(self) -> str: 

350 series = self.tr_series 

351 footer = self._get_footer() 

352 

353 if len(series) == 0: 

354 return "{name}([], {footer})".format( 

355 name=type(self.series).__name__, footer=footer 

356 ) 

357 

358 fmt_index, have_header = self._get_formatted_index() 

359 fmt_values = self._get_formatted_values() 

360 

361 if self.truncate_v: 

362 n_header_rows = 0 

363 row_num = self.tr_row_num 

364 row_num = cast(int, row_num) 

365 width = self.adj.len(fmt_values[row_num - 1]) 

366 if width > 3: 

367 dot_str = "..." 

368 else: 

369 dot_str = ".." 

370 # Series uses mode=center because it has single value columns 

371 # DataFrame uses mode=left 

372 dot_str = self.adj.justify([dot_str], width, mode="center")[0] 

373 fmt_values.insert(row_num + n_header_rows, dot_str) 

374 fmt_index.insert(row_num + 1, "") 

375 

376 if self.index: 

377 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) 

378 else: 

379 result = self.adj.adjoin(3, fmt_values) 

380 

381 if self.header and have_header: 

382 result = fmt_index[0] + "\n" + result 

383 

384 if footer: 

385 result += "\n" + footer 

386 

387 return str("".join(result)) 

388 

389 

390class TextAdjustment: 

391 def __init__(self): 

392 self.encoding = get_option("display.encoding") 

393 

394 def len(self, text: str) -> int: 

395 return len(text) 

396 

397 def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]: 

398 return justify(texts, max_len, mode=mode) 

399 

400 def adjoin(self, space: int, *lists, **kwargs) -> str: 

401 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) 

402 

403 

404class EastAsianTextAdjustment(TextAdjustment): 

405 def __init__(self): 

406 super().__init__() 

407 if get_option("display.unicode.ambiguous_as_wide"): 

408 self.ambiguous_width = 2 

409 else: 

410 self.ambiguous_width = 1 

411 

412 # Definition of East Asian Width 

413 # http://unicode.org/reports/tr11/ 

414 # Ambiguous width can be changed by option 

415 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} 

416 

417 def len(self, text: str) -> int: 

418 """ 

419 Calculate display width considering unicode East Asian Width 

420 """ 

421 if not isinstance(text, str): 

422 return len(text) 

423 

424 return sum( 

425 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text 

426 ) 

427 

428 def justify( 

429 self, texts: Iterable[str], max_len: int, mode: str = "right" 

430 ) -> List[str]: 

431 # re-calculate padding space per str considering East Asian Width 

432 def _get_pad(t): 

433 return max_len - self.len(t) + len(t) 

434 

435 if mode == "left": 

436 return [x.ljust(_get_pad(x)) for x in texts] 

437 elif mode == "center": 

438 return [x.center(_get_pad(x)) for x in texts] 

439 else: 

440 return [x.rjust(_get_pad(x)) for x in texts] 

441 

442 

443def _get_adjustment() -> TextAdjustment: 

444 use_east_asian_width = get_option("display.unicode.east_asian_width") 

445 if use_east_asian_width: 

446 return EastAsianTextAdjustment() 

447 else: 

448 return TextAdjustment() 

449 

450 

451class TableFormatter: 

452 

453 show_dimensions: Union[bool, str] 

454 is_truncated: bool 

455 formatters: formatters_type 

456 columns: Index 

457 

458 @property 

459 def should_show_dimensions(self) -> bool: 

460 return self.show_dimensions is True or ( 

461 self.show_dimensions == "truncate" and self.is_truncated 

462 ) 

463 

464 def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: 

465 if isinstance(self.formatters, (list, tuple)): 

466 if is_integer(i): 

467 i = cast(int, i) 

468 return self.formatters[i] 

469 else: 

470 return None 

471 else: 

472 if is_integer(i) and i not in self.columns: 

473 i = self.columns[i] 

474 return self.formatters.get(i, None) 

475 

476 @contextmanager 

477 def get_buffer( 

478 self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None 

479 ): 

480 """ 

481 Context manager to open, yield and close buffer for filenames or Path-like 

482 objects, otherwise yield buf unchanged. 

483 """ 

484 if buf is not None: 

485 buf = stringify_path(buf) 

486 else: 

487 buf = StringIO() 

488 

489 if encoding is None: 

490 encoding = "utf-8" 

491 elif not isinstance(buf, str): 

492 raise ValueError("buf is not a file name and encoding is specified.") 

493 

494 if hasattr(buf, "write"): 

495 yield buf 

496 elif isinstance(buf, str): 

497 with open(buf, "w", encoding=encoding, newline="") as f: 

498 # GH#30034 open instead of codecs.open prevents a file leak 

499 # if we have an invalid encoding argument. 

500 # newline="" is needed to roundtrip correctly on 

501 # windows test_to_latex_filename 

502 yield f 

503 else: 

504 raise TypeError("buf is not a file name and it has no write method") 

505 

506 def write_result(self, buf: IO[str]) -> None: 

507 """ 

508 Write the result of serialization to buf. 

509 """ 

510 raise AbstractMethodError(self) 

511 

512 def get_result( 

513 self, 

514 buf: Optional[FilePathOrBuffer[str]] = None, 

515 encoding: Optional[str] = None, 

516 ) -> Optional[str]: 

517 """ 

518 Perform serialization. Write to buf or return as string if buf is None. 

519 """ 

520 with self.get_buffer(buf, encoding=encoding) as f: 

521 self.write_result(buf=f) 

522 if buf is None: 

523 return f.getvalue() 

524 return None 

525 

526 

527class DataFrameFormatter(TableFormatter): 

528 """ 

529 Render a DataFrame 

530 

531 self.to_string() : console-friendly tabular output 

532 self.to_html() : html table 

533 self.to_latex() : LaTeX tabular environment table 

534 

535 """ 

536 

537 __doc__ = __doc__ if __doc__ else "" 

538 __doc__ += common_docstring + return_docstring 

539 

540 def __init__( 

541 self, 

542 frame: "DataFrame", 

543 columns: Optional[Sequence[str]] = None, 

544 col_space: Optional[Union[str, int]] = None, 

545 header: Union[bool, Sequence[str]] = True, 

546 index: bool = True, 

547 na_rep: str = "NaN", 

548 formatters: Optional[formatters_type] = None, 

549 justify: Optional[str] = None, 

550 float_format: Optional[float_format_type] = None, 

551 sparsify: Optional[bool] = None, 

552 index_names: bool = True, 

553 line_width: Optional[int] = None, 

554 max_rows: Optional[int] = None, 

555 min_rows: Optional[int] = None, 

556 max_cols: Optional[int] = None, 

557 show_dimensions: Union[bool, str] = False, 

558 decimal: str = ".", 

559 table_id: Optional[str] = None, 

560 render_links: bool = False, 

561 bold_rows: bool = False, 

562 escape: bool = True, 

563 ): 

564 self.frame = frame 

565 self.show_index_names = index_names 

566 

567 if sparsify is None: 

568 sparsify = get_option("display.multi_sparse") 

569 

570 self.sparsify = sparsify 

571 

572 self.float_format = float_format 

573 if formatters is None: 

574 self.formatters = {} 

575 elif len(frame.columns) == len(formatters) or isinstance(formatters, dict): 

576 self.formatters = formatters 

577 else: 

578 raise ValueError( 

579 ( 

580 "Formatters length({flen}) should match " 

581 "DataFrame number of columns({dlen})" 

582 ).format(flen=len(formatters), dlen=len(frame.columns)) 

583 ) 

584 self.na_rep = na_rep 

585 self.decimal = decimal 

586 self.col_space = col_space 

587 self.header = header 

588 self.index = index 

589 self.line_width = line_width 

590 self.max_rows = max_rows 

591 self.min_rows = min_rows 

592 self.max_cols = max_cols 

593 self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) 

594 self.show_dimensions = show_dimensions 

595 self.table_id = table_id 

596 self.render_links = render_links 

597 

598 if justify is None: 

599 self.justify = get_option("display.colheader_justify") 

600 else: 

601 self.justify = justify 

602 

603 self.bold_rows = bold_rows 

604 self.escape = escape 

605 

606 if columns is not None: 

607 self.columns = ensure_index(columns) 

608 self.frame = self.frame[self.columns] 

609 else: 

610 self.columns = frame.columns 

611 

612 self._chk_truncate() 

613 self.adj = _get_adjustment() 

614 

615 def _chk_truncate(self) -> None: 

616 """ 

617 Checks whether the frame should be truncated. If so, slices 

618 the frame up. 

619 """ 

620 from pandas.core.reshape.concat import concat 

621 

622 # Cut the data to the information actually printed 

623 max_cols = self.max_cols 

624 max_rows = self.max_rows 

625 self.max_rows_adj: Optional[int] 

626 max_rows_adj: Optional[int] 

627 

628 if max_cols == 0 or max_rows == 0: # assume we are in the terminal 

629 (w, h) = get_terminal_size() 

630 self.w = w 

631 self.h = h 

632 if self.max_rows == 0: 

633 dot_row = 1 

634 prompt_row = 1 

635 if self.show_dimensions: 

636 show_dimension_rows = 3 

637 # assume we only get here if self.header is boolean. 

638 # i.e. not to_latex() where self.header may be List[str] 

639 self.header = cast(bool, self.header) 

640 n_add_rows = self.header + dot_row + show_dimension_rows + prompt_row 

641 # rows available to fill with actual data 

642 max_rows_adj = self.h - n_add_rows 

643 self.max_rows_adj = max_rows_adj 

644 

645 # Format only rows and columns that could potentially fit the 

646 # screen 

647 if max_cols == 0 and len(self.frame.columns) > w: 

648 max_cols = w 

649 if max_rows == 0 and len(self.frame) > h: 

650 max_rows = h 

651 

652 if not hasattr(self, "max_rows_adj"): 

653 if max_rows: 

654 if (len(self.frame) > max_rows) and self.min_rows: 

655 # if truncated, set max_rows showed to min_rows 

656 max_rows = min(self.min_rows, max_rows) 

657 self.max_rows_adj = max_rows 

658 if not hasattr(self, "max_cols_adj"): 

659 self.max_cols_adj = max_cols 

660 

661 max_cols_adj = self.max_cols_adj 

662 max_rows_adj = self.max_rows_adj 

663 

664 truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj) 

665 truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj) 

666 

667 frame = self.frame 

668 if truncate_h: 

669 # cast here since if truncate_h is True, max_cols_adj is not None 

670 max_cols_adj = cast(int, max_cols_adj) 

671 if max_cols_adj == 0: 

672 col_num = len(frame.columns) 

673 elif max_cols_adj == 1: 

674 max_cols = cast(int, max_cols) 

675 frame = frame.iloc[:, :max_cols] 

676 col_num = max_cols 

677 else: 

678 col_num = max_cols_adj // 2 

679 frame = concat( 

680 (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 

681 ) 

682 # truncate formatter 

683 if isinstance(self.formatters, (list, tuple)): 

684 truncate_fmt = self.formatters 

685 self.formatters = [ 

686 *truncate_fmt[:col_num], 

687 *truncate_fmt[-col_num:], 

688 ] 

689 self.tr_col_num = col_num 

690 if truncate_v: 

691 # cast here since if truncate_v is True, max_rows_adj is not None 

692 max_rows_adj = cast(int, max_rows_adj) 

693 if max_rows_adj == 1: 

694 row_num = max_rows 

695 frame = frame.iloc[:max_rows, :] 

696 else: 

697 row_num = max_rows_adj // 2 

698 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) 

699 self.tr_row_num = row_num 

700 else: 

701 self.tr_row_num = None 

702 

703 self.tr_frame = frame 

704 self.truncate_h = truncate_h 

705 self.truncate_v = truncate_v 

706 self.is_truncated = bool(self.truncate_h or self.truncate_v) 

707 

708 def _to_str_columns(self) -> List[List[str]]: 

709 """ 

710 Render a DataFrame to a list of columns (as lists of strings). 

711 """ 

712 # this method is not used by to_html where self.col_space 

713 # could be a string so safe to cast 

714 self.col_space = cast(int, self.col_space) 

715 

716 frame = self.tr_frame 

717 # may include levels names also 

718 

719 str_index = self._get_formatted_index(frame) 

720 

721 if not is_list_like(self.header) and not self.header: 

722 stringified = [] 

723 for i, c in enumerate(frame): 

724 fmt_values = self._format_col(i) 

725 fmt_values = _make_fixed_width( 

726 fmt_values, 

727 self.justify, 

728 minimum=(self.col_space or 0), 

729 adj=self.adj, 

730 ) 

731 stringified.append(fmt_values) 

732 else: 

733 if is_list_like(self.header): 

734 # cast here since can't be bool if is_list_like 

735 self.header = cast(List[str], self.header) 

736 if len(self.header) != len(self.columns): 

737 raise ValueError( 

738 ( 

739 "Writing {ncols} cols but got {nalias} " 

740 "aliases".format( 

741 ncols=len(self.columns), nalias=len(self.header) 

742 ) 

743 ) 

744 ) 

745 str_columns = [[label] for label in self.header] 

746 else: 

747 str_columns = self._get_formatted_column_labels(frame) 

748 

749 if self.show_row_idx_names: 

750 for x in str_columns: 

751 x.append("") 

752 

753 stringified = [] 

754 for i, c in enumerate(frame): 

755 cheader = str_columns[i] 

756 header_colwidth = max( 

757 self.col_space or 0, *(self.adj.len(x) for x in cheader) 

758 ) 

759 fmt_values = self._format_col(i) 

760 fmt_values = _make_fixed_width( 

761 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj 

762 ) 

763 

764 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) 

765 cheader = self.adj.justify(cheader, max_len, mode=self.justify) 

766 stringified.append(cheader + fmt_values) 

767 

768 strcols = stringified 

769 if self.index: 

770 strcols.insert(0, str_index) 

771 

772 # Add ... to signal truncated 

773 truncate_h = self.truncate_h 

774 truncate_v = self.truncate_v 

775 

776 if truncate_h: 

777 col_num = self.tr_col_num 

778 strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index))) 

779 if truncate_v: 

780 n_header_rows = len(str_index) - len(frame) 

781 row_num = self.tr_row_num 

782 # cast here since if truncate_v is True, self.tr_row_num is not None 

783 row_num = cast(int, row_num) 

784 for ix, col in enumerate(strcols): 

785 # infer from above row 

786 cwidth = self.adj.len(strcols[ix][row_num]) 

787 is_dot_col = False 

788 if truncate_h: 

789 is_dot_col = ix == col_num + 1 

790 if cwidth > 3 or is_dot_col: 

791 my_str = "..." 

792 else: 

793 my_str = ".." 

794 

795 if ix == 0: 

796 dot_mode = "left" 

797 elif is_dot_col: 

798 cwidth = 4 

799 dot_mode = "right" 

800 else: 

801 dot_mode = "right" 

802 dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0] 

803 strcols[ix].insert(row_num + n_header_rows, dot_str) 

804 return strcols 

805 

806 def write_result(self, buf: IO[str]) -> None: 

807 """ 

808 Render a DataFrame to a console-friendly tabular output. 

809 """ 

810 from pandas import Series 

811 

812 frame = self.frame 

813 

814 if len(frame.columns) == 0 or len(frame.index) == 0: 

815 info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( 

816 name=type(self.frame).__name__, 

817 col=pprint_thing(frame.columns), 

818 idx=pprint_thing(frame.index), 

819 ) 

820 text = info_line 

821 else: 

822 

823 strcols = self._to_str_columns() 

824 if self.line_width is None: # no need to wrap around just print 

825 # the whole frame 

826 text = self.adj.adjoin(1, *strcols) 

827 elif ( 

828 not isinstance(self.max_cols, int) or self.max_cols > 0 

829 ): # need to wrap around 

830 text = self._join_multiline(*strcols) 

831 else: # max_cols == 0. Try to fit frame to terminal 

832 lines = self.adj.adjoin(1, *strcols).split("\n") 

833 max_len = Series(lines).str.len().max() 

834 # plus truncate dot col 

835 dif = max_len - self.w 

836 # '+ 1' to avoid too wide repr (GH PR #17023) 

837 adj_dif = dif + 1 

838 col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) 

839 n_cols = len(col_lens) 

840 counter = 0 

841 while adj_dif > 0 and n_cols > 1: 

842 counter += 1 

843 mid = int(round(n_cols / 2.0)) 

844 mid_ix = col_lens.index[mid] 

845 col_len = col_lens[mid_ix] 

846 # adjoin adds one 

847 adj_dif -= col_len + 1 

848 col_lens = col_lens.drop(mid_ix) 

849 n_cols = len(col_lens) 

850 # subtract index column 

851 max_cols_adj = n_cols - self.index 

852 # GH-21180. Ensure that we print at least two. 

853 max_cols_adj = max(max_cols_adj, 2) 

854 self.max_cols_adj = max_cols_adj 

855 

856 # Call again _chk_truncate to cut frame appropriately 

857 # and then generate string representation 

858 self._chk_truncate() 

859 strcols = self._to_str_columns() 

860 text = self.adj.adjoin(1, *strcols) 

861 buf.writelines(text) 

862 

863 if self.should_show_dimensions: 

864 buf.write( 

865 "\n\n[{nrows} rows x {ncols} columns]".format( 

866 nrows=len(frame), ncols=len(frame.columns) 

867 ) 

868 ) 

869 

870 def _join_multiline(self, *args) -> str: 

871 lwidth = self.line_width 

872 adjoin_width = 1 

873 strcols = list(args) 

874 if self.index: 

875 idx = strcols.pop(0) 

876 lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width 

877 

878 col_widths = [ 

879 np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 

880 for col in strcols 

881 ] 

882 

883 assert lwidth is not None 

884 col_bins = _binify(col_widths, lwidth) 

885 nbins = len(col_bins) 

886 

887 if self.truncate_v: 

888 # cast here since if truncate_v is True, max_rows_adj is not None 

889 self.max_rows_adj = cast(int, self.max_rows_adj) 

890 nrows = self.max_rows_adj + 1 

891 else: 

892 nrows = len(self.frame) 

893 

894 str_lst = [] 

895 st = 0 

896 for i, ed in enumerate(col_bins): 

897 row = strcols[st:ed] 

898 if self.index: 

899 row.insert(0, idx) 

900 if nbins > 1: 

901 if ed <= len(strcols) and i < nbins - 1: 

902 row.append([" \\"] + [" "] * (nrows - 1)) 

903 else: 

904 row.append([" "] * nrows) 

905 str_lst.append(self.adj.adjoin(adjoin_width, *row)) 

906 st = ed 

907 return "\n\n".join(str_lst) 

908 

909 def to_string( 

910 self, 

911 buf: Optional[FilePathOrBuffer[str]] = None, 

912 encoding: Optional[str] = None, 

913 ) -> Optional[str]: 

914 return self.get_result(buf=buf, encoding=encoding) 

915 

916 def to_latex( 

917 self, 

918 buf: Optional[FilePathOrBuffer[str]] = None, 

919 column_format: Optional[str] = None, 

920 longtable: bool = False, 

921 encoding: Optional[str] = None, 

922 multicolumn: bool = False, 

923 multicolumn_format: Optional[str] = None, 

924 multirow: bool = False, 

925 caption: Optional[str] = None, 

926 label: Optional[str] = None, 

927 ) -> Optional[str]: 

928 """ 

929 Render a DataFrame to a LaTeX tabular/longtable environment output. 

930 """ 

931 

932 from pandas.io.formats.latex import LatexFormatter 

933 

934 return LatexFormatter( 

935 self, 

936 column_format=column_format, 

937 longtable=longtable, 

938 multicolumn=multicolumn, 

939 multicolumn_format=multicolumn_format, 

940 multirow=multirow, 

941 caption=caption, 

942 label=label, 

943 ).get_result(buf=buf, encoding=encoding) 

944 

945 def _format_col(self, i: int) -> List[str]: 

946 frame = self.tr_frame 

947 formatter = self._get_formatter(i) 

948 return format_array( 

949 frame.iloc[:, i]._values, 

950 formatter, 

951 float_format=self.float_format, 

952 na_rep=self.na_rep, 

953 space=self.col_space, 

954 decimal=self.decimal, 

955 ) 

956 

957 def to_html( 

958 self, 

959 buf: Optional[FilePathOrBuffer[str]] = None, 

960 encoding: Optional[str] = None, 

961 classes: Optional[Union[str, List, Tuple]] = None, 

962 notebook: bool = False, 

963 border: Optional[int] = None, 

964 ) -> Optional[str]: 

965 """ 

966 Render a DataFrame to a html table. 

967 

968 Parameters 

969 ---------- 

970 classes : str or list-like 

971 classes to include in the `class` attribute of the opening 

972 ``<table>`` tag, in addition to the default "dataframe". 

973 notebook : {True, False}, optional, default False 

974 Whether the generated HTML is for IPython Notebook. 

975 border : int 

976 A ``border=border`` attribute is included in the opening 

977 ``<table>`` tag. Default ``pd.options.display.html.border``. 

978 """ 

979 from pandas.io.formats.html import HTMLFormatter, NotebookFormatter 

980 

981 Klass = NotebookFormatter if notebook else HTMLFormatter 

982 return Klass(self, classes=classes, border=border).get_result( 

983 buf=buf, encoding=encoding 

984 ) 

985 

986 def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: 

987 from pandas.core.indexes.multi import _sparsify 

988 

989 columns = frame.columns 

990 

991 if isinstance(columns, ABCMultiIndex): 

992 fmt_columns = columns.format(sparsify=False, adjoin=False) 

993 fmt_columns = list(zip(*fmt_columns)) 

994 dtypes = self.frame.dtypes._values 

995 

996 # if we have a Float level, they don't use leading space at all 

997 restrict_formatting = any(l.is_floating for l in columns.levels) 

998 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 

999 

1000 def space_format(x, y): 

1001 if ( 

1002 y not in self.formatters 

1003 and need_leadsp[x] 

1004 and not restrict_formatting 

1005 ): 

1006 return " " + y 

1007 return y 

1008 

1009 str_columns = list( 

1010 zip(*[[space_format(x, y) for y in x] for x in fmt_columns]) 

1011 ) 

1012 if self.sparsify and len(str_columns): 

1013 str_columns = _sparsify(str_columns) 

1014 

1015 str_columns = [list(x) for x in zip(*str_columns)] 

1016 else: 

1017 fmt_columns = columns.format() 

1018 dtypes = self.frame.dtypes 

1019 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 

1020 str_columns = [ 

1021 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] 

1022 for i, (col, x) in enumerate(zip(columns, fmt_columns)) 

1023 ] 

1024 # self.str_columns = str_columns 

1025 return str_columns 

1026 

1027 @property 

1028 def has_index_names(self) -> bool: 

1029 return _has_names(self.frame.index) 

1030 

1031 @property 

1032 def has_column_names(self) -> bool: 

1033 return _has_names(self.frame.columns) 

1034 

1035 @property 

1036 def show_row_idx_names(self) -> bool: 

1037 return all((self.has_index_names, self.index, self.show_index_names)) 

1038 

1039 @property 

1040 def show_col_idx_names(self) -> bool: 

1041 return all((self.has_column_names, self.show_index_names, self.header)) 

1042 

1043 def _get_formatted_index(self, frame: "DataFrame") -> List[str]: 

1044 # Note: this is only used by to_string() and to_latex(), not by 

1045 # to_html(). so safe to cast col_space here. 

1046 self.col_space = cast(int, self.col_space) 

1047 index = frame.index 

1048 columns = frame.columns 

1049 fmt = self._get_formatter("__index__") 

1050 

1051 if isinstance(index, ABCMultiIndex): 

1052 fmt_index = index.format( 

1053 sparsify=self.sparsify, 

1054 adjoin=False, 

1055 names=self.show_row_idx_names, 

1056 formatter=fmt, 

1057 ) 

1058 else: 

1059 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)] 

1060 

1061 fmt_index = [ 

1062 tuple( 

1063 _make_fixed_width( 

1064 list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj 

1065 ) 

1066 ) 

1067 for x in fmt_index 

1068 ] 

1069 

1070 adjoined = self.adj.adjoin(1, *fmt_index).split("\n") 

1071 

1072 # empty space for columns 

1073 if self.show_col_idx_names: 

1074 col_header = ["{x}".format(x=x) for x in self._get_column_name_list()] 

1075 else: 

1076 col_header = [""] * columns.nlevels 

1077 

1078 if self.header: 

1079 return col_header + adjoined 

1080 else: 

1081 return adjoined 

1082 

1083 def _get_column_name_list(self) -> List[str]: 

1084 names: List[str] = [] 

1085 columns = self.frame.columns 

1086 if isinstance(columns, ABCMultiIndex): 

1087 names.extend("" if name is None else name for name in columns.names) 

1088 else: 

1089 names.append("" if columns.name is None else columns.name) 

1090 return names 

1091 

1092 

1093# ---------------------------------------------------------------------- 

1094# Array formatters 

1095 

1096 

1097def format_array( 

1098 values: Any, 

1099 formatter: Optional[Callable], 

1100 float_format: Optional[float_format_type] = None, 

1101 na_rep: str = "NaN", 

1102 digits: Optional[int] = None, 

1103 space: Optional[Union[str, int]] = None, 

1104 justify: str = "right", 

1105 decimal: str = ".", 

1106 leading_space: Optional[bool] = None, 

1107) -> List[str]: 

1108 """ 

1109 Format an array for printing. 

1110 

1111 Parameters 

1112 ---------- 

1113 values 

1114 formatter 

1115 float_format 

1116 na_rep 

1117 digits 

1118 space 

1119 justify 

1120 decimal 

1121 leading_space : bool, optional 

1122 Whether the array should be formatted with a leading space. 

1123 When an array as a column of a Series or DataFrame, we do want 

1124 the leading space to pad between columns. 

1125 

1126 When formatting an Index subclass 

1127 (e.g. IntervalIndex._format_native_types), we don't want the 

1128 leading space since it should be left-aligned. 

1129 

1130 Returns 

1131 ------- 

1132 List[str] 

1133 """ 

1134 

1135 fmt_klass: Type[GenericArrayFormatter] 

1136 if is_datetime64_dtype(values.dtype): 

1137 fmt_klass = Datetime64Formatter 

1138 elif is_datetime64tz_dtype(values): 

1139 fmt_klass = Datetime64TZFormatter 

1140 elif is_timedelta64_dtype(values.dtype): 

1141 fmt_klass = Timedelta64Formatter 

1142 elif is_extension_array_dtype(values.dtype): 

1143 fmt_klass = ExtensionArrayFormatter 

1144 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): 

1145 fmt_klass = FloatArrayFormatter 

1146 elif is_integer_dtype(values.dtype): 

1147 fmt_klass = IntArrayFormatter 

1148 else: 

1149 fmt_klass = GenericArrayFormatter 

1150 

1151 if space is None: 

1152 space = get_option("display.column_space") 

1153 

1154 if float_format is None: 

1155 float_format = get_option("display.float_format") 

1156 

1157 if digits is None: 

1158 digits = get_option("display.precision") 

1159 

1160 fmt_obj = fmt_klass( 

1161 values, 

1162 digits=digits, 

1163 na_rep=na_rep, 

1164 float_format=float_format, 

1165 formatter=formatter, 

1166 space=space, 

1167 justify=justify, 

1168 decimal=decimal, 

1169 leading_space=leading_space, 

1170 ) 

1171 

1172 return fmt_obj.get_result() 

1173 

1174 

1175class GenericArrayFormatter: 

1176 def __init__( 

1177 self, 

1178 values: Any, 

1179 digits: int = 7, 

1180 formatter: Optional[Callable] = None, 

1181 na_rep: str = "NaN", 

1182 space: Union[str, int] = 12, 

1183 float_format: Optional[float_format_type] = None, 

1184 justify: str = "right", 

1185 decimal: str = ".", 

1186 quoting: Optional[int] = None, 

1187 fixed_width: bool = True, 

1188 leading_space: Optional[bool] = None, 

1189 ): 

1190 self.values = values 

1191 self.digits = digits 

1192 self.na_rep = na_rep 

1193 self.space = space 

1194 self.formatter = formatter 

1195 self.float_format = float_format 

1196 self.justify = justify 

1197 self.decimal = decimal 

1198 self.quoting = quoting 

1199 self.fixed_width = fixed_width 

1200 self.leading_space = leading_space 

1201 

1202 def get_result(self) -> List[str]: 

1203 fmt_values = self._format_strings() 

1204 return _make_fixed_width(fmt_values, self.justify) 

1205 

1206 def _format_strings(self) -> List[str]: 

1207 if self.float_format is None: 

1208 float_format = get_option("display.float_format") 

1209 if float_format is None: 

1210 fmt_str = "{{x: .{prec:d}g}}".format( 

1211 prec=get_option("display.precision") 

1212 ) 

1213 float_format = lambda x: fmt_str.format(x=x) 

1214 else: 

1215 float_format = self.float_format 

1216 

1217 formatter = ( 

1218 self.formatter 

1219 if self.formatter is not None 

1220 else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) 

1221 ) 

1222 

1223 def _format(x): 

1224 if self.na_rep is not None and is_scalar(x) and isna(x): 

1225 try: 

1226 # try block for np.isnat specifically 

1227 # determine na_rep if x is None or NaT-like 

1228 if x is None: 

1229 return "None" 

1230 elif x is NA: 

1231 return str(NA) 

1232 elif x is NaT or np.isnat(x): 

1233 return "NaT" 

1234 except (TypeError, ValueError): 

1235 # np.isnat only handles datetime or timedelta objects 

1236 pass 

1237 return self.na_rep 

1238 elif isinstance(x, PandasObject): 

1239 return "{x}".format(x=x) 

1240 else: 

1241 # object dtype 

1242 return "{x}".format(x=formatter(x)) 

1243 

1244 vals = self.values 

1245 if isinstance(vals, Index): 

1246 vals = vals._values 

1247 elif isinstance(vals, ABCSparseArray): 

1248 vals = vals.values 

1249 

1250 is_float_type = lib.map_infer(vals, is_float) & notna(vals) 

1251 leading_space = self.leading_space 

1252 if leading_space is None: 

1253 leading_space = is_float_type.any() 

1254 

1255 fmt_values = [] 

1256 for i, v in enumerate(vals): 

1257 if not is_float_type[i] and leading_space: 

1258 fmt_values.append(" {v}".format(v=_format(v))) 

1259 elif is_float_type[i]: 

1260 fmt_values.append(float_format(v)) 

1261 else: 

1262 if leading_space is False: 

1263 # False specifically, so that the default is 

1264 # to include a space if we get here. 

1265 tpl = "{v}" 

1266 else: 

1267 tpl = " {v}" 

1268 fmt_values.append(tpl.format(v=_format(v))) 

1269 

1270 return fmt_values 

1271 

1272 

1273class FloatArrayFormatter(GenericArrayFormatter): 

1274 """ 

1275 

1276 """ 

1277 

1278 def __init__(self, *args, **kwargs): 

1279 super().__init__(*args, **kwargs) 

1280 

1281 # float_format is expected to be a string 

1282 # formatter should be used to pass a function 

1283 if self.float_format is not None and self.formatter is None: 

1284 # GH21625, GH22270 

1285 self.fixed_width = False 

1286 if callable(self.float_format): 

1287 self.formatter = self.float_format 

1288 self.float_format = None 

1289 

1290 def _value_formatter( 

1291 self, 

1292 float_format: Optional[float_format_type] = None, 

1293 threshold: Optional[Union[float, int]] = None, 

1294 ) -> Callable: 

1295 """Returns a function to be applied on each value to format it 

1296 """ 

1297 

1298 # the float_format parameter supersedes self.float_format 

1299 if float_format is None: 

1300 float_format = self.float_format 

1301 

1302 # we are going to compose different functions, to first convert to 

1303 # a string, then replace the decimal symbol, and finally chop according 

1304 # to the threshold 

1305 

1306 # when there is no float_format, we use str instead of '%g' 

1307 # because str(0.0) = '0.0' while '%g' % 0.0 = '0' 

1308 if float_format: 

1309 

1310 def base_formatter(v): 

1311 return float_format(value=v) if notna(v) else self.na_rep 

1312 

1313 else: 

1314 

1315 def base_formatter(v): 

1316 return str(v) if notna(v) else self.na_rep 

1317 

1318 if self.decimal != ".": 

1319 

1320 def decimal_formatter(v): 

1321 return base_formatter(v).replace(".", self.decimal, 1) 

1322 

1323 else: 

1324 decimal_formatter = base_formatter 

1325 

1326 if threshold is None: 

1327 return decimal_formatter 

1328 

1329 def formatter(value): 

1330 if notna(value): 

1331 if abs(value) > threshold: 

1332 return decimal_formatter(value) 

1333 else: 

1334 return decimal_formatter(0.0) 

1335 else: 

1336 return self.na_rep 

1337 

1338 return formatter 

1339 

1340 def get_result_as_array(self) -> np.ndarray: 

1341 """ 

1342 Returns the float values converted into strings using 

1343 the parameters given at initialisation, as a numpy array 

1344 """ 

1345 

1346 if self.formatter is not None: 

1347 return np.array([self.formatter(x) for x in self.values]) 

1348 

1349 if self.fixed_width: 

1350 threshold = get_option("display.chop_threshold") 

1351 else: 

1352 threshold = None 

1353 

1354 # if we have a fixed_width, we'll need to try different float_format 

1355 def format_values_with(float_format): 

1356 formatter = self._value_formatter(float_format, threshold) 

1357 

1358 # default formatter leaves a space to the left when formatting 

1359 # floats, must be consistent for left-justifying NaNs (GH #25061) 

1360 if self.justify == "left": 

1361 na_rep = " " + self.na_rep 

1362 else: 

1363 na_rep = self.na_rep 

1364 

1365 # separate the wheat from the chaff 

1366 values = self.values 

1367 is_complex = is_complex_dtype(values) 

1368 mask = isna(values) 

1369 if hasattr(values, "to_dense"): # sparse numpy ndarray 

1370 values = values.to_dense() 

1371 values = np.array(values, dtype="object") 

1372 values[mask] = na_rep 

1373 imask = (~mask).ravel() 

1374 values.flat[imask] = np.array( 

1375 [formatter(val) for val in values.ravel()[imask]] 

1376 ) 

1377 

1378 if self.fixed_width: 

1379 if is_complex: 

1380 result = _trim_zeros_complex(values, na_rep) 

1381 else: 

1382 result = _trim_zeros_float(values, na_rep) 

1383 return np.asarray(result, dtype="object") 

1384 

1385 return values 

1386 

1387 # There is a special default string when we are fixed-width 

1388 # The default is otherwise to use str instead of a formatting string 

1389 float_format: Optional[float_format_type] 

1390 if self.float_format is None: 

1391 if self.fixed_width: 

1392 float_format = partial( 

1393 "{value: .{digits:d}f}".format, digits=self.digits 

1394 ) 

1395 else: 

1396 float_format = self.float_format 

1397 else: 

1398 float_format = lambda value: self.float_format % value 

1399 

1400 formatted_values = format_values_with(float_format) 

1401 

1402 if not self.fixed_width: 

1403 return formatted_values 

1404 

1405 # we need do convert to engineering format if some values are too small 

1406 # and would appear as 0, or if some values are too big and take too 

1407 # much space 

1408 

1409 if len(formatted_values) > 0: 

1410 maxlen = max(len(x) for x in formatted_values) 

1411 too_long = maxlen > self.digits + 6 

1412 else: 

1413 too_long = False 

1414 

1415 with np.errstate(invalid="ignore"): 

1416 abs_vals = np.abs(self.values) 

1417 # this is pretty arbitrary for now 

1418 # large values: more that 8 characters including decimal symbol 

1419 # and first digit, hence > 1e6 

1420 has_large_values = (abs_vals > 1e6).any() 

1421 has_small_values = ( 

1422 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0) 

1423 ).any() 

1424 

1425 if has_small_values or (too_long and has_large_values): 

1426 float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) 

1427 formatted_values = format_values_with(float_format) 

1428 

1429 return formatted_values 

1430 

1431 def _format_strings(self) -> List[str]: 

1432 # shortcut 

1433 if self.formatter is not None: 

1434 return [self.formatter(x) for x in self.values] 

1435 

1436 return list(self.get_result_as_array()) 

1437 

1438 

1439class IntArrayFormatter(GenericArrayFormatter): 

1440 def _format_strings(self) -> List[str]: 

1441 formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) 

1442 fmt_values = [formatter(x) for x in self.values] 

1443 return fmt_values 

1444 

1445 

1446class Datetime64Formatter(GenericArrayFormatter): 

1447 def __init__( 

1448 self, 

1449 values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray], 

1450 nat_rep: str = "NaT", 

1451 date_format: None = None, 

1452 **kwargs, 

1453 ): 

1454 super().__init__(values, **kwargs) 

1455 self.nat_rep = nat_rep 

1456 self.date_format = date_format 

1457 

1458 def _format_strings(self) -> List[str]: 

1459 """ we by definition have DO NOT have a TZ """ 

1460 

1461 values = self.values 

1462 

1463 if not isinstance(values, DatetimeIndex): 

1464 values = DatetimeIndex(values) 

1465 

1466 if self.formatter is not None and callable(self.formatter): 

1467 return [self.formatter(x) for x in values] 

1468 

1469 fmt_values = format_array_from_datetime( 

1470 values.asi8.ravel(), 

1471 format=_get_format_datetime64_from_values(values, self.date_format), 

1472 na_rep=self.nat_rep, 

1473 ).reshape(values.shape) 

1474 return fmt_values.tolist() 

1475 

1476 

1477class ExtensionArrayFormatter(GenericArrayFormatter): 

1478 def _format_strings(self) -> List[str]: 

1479 values = self.values 

1480 if isinstance(values, (ABCIndexClass, ABCSeries)): 

1481 values = values._values 

1482 

1483 formatter = values._formatter(boxed=True) 

1484 

1485 if is_categorical_dtype(values.dtype): 

1486 # Categorical is special for now, so that we can preserve tzinfo 

1487 array = values._internal_get_values() 

1488 else: 

1489 array = np.asarray(values) 

1490 

1491 fmt_values = format_array( 

1492 array, 

1493 formatter, 

1494 float_format=self.float_format, 

1495 na_rep=self.na_rep, 

1496 digits=self.digits, 

1497 space=self.space, 

1498 justify=self.justify, 

1499 leading_space=self.leading_space, 

1500 ) 

1501 return fmt_values 

1502 

1503 

1504def format_percentiles( 

1505 percentiles: Union[ 

1506 np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]] 

1507 ] 

1508) -> List[str]: 

1509 """ 

1510 Outputs rounded and formatted percentiles. 

1511 

1512 Parameters 

1513 ---------- 

1514 percentiles : list-like, containing floats from interval [0,1] 

1515 

1516 Returns 

1517 ------- 

1518 formatted : list of strings 

1519 

1520 Notes 

1521 ----- 

1522 Rounding precision is chosen so that: (1) if any two elements of 

1523 ``percentiles`` differ, they remain different after rounding 

1524 (2) no entry is *rounded* to 0% or 100%. 

1525 Any non-integer is always rounded to at least 1 decimal place. 

1526 

1527 Examples 

1528 -------- 

1529 Keeps all entries different after rounding: 

1530 

1531 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) 

1532 ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] 

1533 

1534 No element is rounded to 0% or 100% (unless already equal to it). 

1535 Duplicates are allowed: 

1536 

1537 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) 

1538 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] 

1539 """ 

1540 

1541 percentiles = np.asarray(percentiles) 

1542 

1543 # It checks for np.NaN as well 

1544 with np.errstate(invalid="ignore"): 

1545 if ( 

1546 not is_numeric_dtype(percentiles) 

1547 or not np.all(percentiles >= 0) 

1548 or not np.all(percentiles <= 1) 

1549 ): 

1550 raise ValueError("percentiles should all be in the interval [0,1]") 

1551 

1552 percentiles = 100 * percentiles 

1553 int_idx = np.isclose(percentiles.astype(int), percentiles) 

1554 

1555 if np.all(int_idx): 

1556 out = percentiles.astype(int).astype(str) 

1557 return [i + "%" for i in out] 

1558 

1559 unique_pcts = np.unique(percentiles) 

1560 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None 

1561 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None 

1562 

1563 # Least precision that keeps percentiles unique after rounding 

1564 prec = -np.floor( 

1565 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end))) 

1566 ).astype(int) 

1567 prec = max(1, prec) 

1568 out = np.empty_like(percentiles, dtype=object) 

1569 out[int_idx] = percentiles[int_idx].astype(int).astype(str) 

1570 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) 

1571 return [i + "%" for i in out] 

1572 

1573 

1574def _is_dates_only( 

1575 values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] 

1576) -> bool: 

1577 # return a boolean if we are only dates (and don't have a timezone) 

1578 assert values.ndim == 1 

1579 

1580 values = DatetimeIndex(values) 

1581 if values.tz is not None: 

1582 return False 

1583 

1584 values_int = values.asi8 

1585 consider_values = values_int != iNaT 

1586 one_day_nanos = 86400 * 1e9 

1587 even_days = ( 

1588 np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0 

1589 ) 

1590 if even_days: 

1591 return True 

1592 return False 

1593 

1594 

1595def _format_datetime64( 

1596 x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT" 

1597) -> str: 

1598 if x is None or (is_scalar(x) and isna(x)): 

1599 return nat_rep 

1600 

1601 if tz is not None or not isinstance(x, Timestamp): 

1602 if getattr(x, "tzinfo", None) is not None: 

1603 x = Timestamp(x).tz_convert(tz) 

1604 else: 

1605 x = Timestamp(x).tz_localize(tz) 

1606 

1607 return str(x) 

1608 

1609 

1610def _format_datetime64_dateonly( 

1611 x: Union[NaTType, Timestamp], nat_rep: str = "NaT", date_format: None = None 

1612) -> str: 

1613 if x is None or (is_scalar(x) and isna(x)): 

1614 return nat_rep 

1615 

1616 if not isinstance(x, Timestamp): 

1617 x = Timestamp(x) 

1618 

1619 if date_format: 

1620 return x.strftime(date_format) 

1621 else: 

1622 return x._date_repr 

1623 

1624 

1625def _get_format_datetime64( 

1626 is_dates_only: bool, nat_rep: str = "NaT", date_format: None = None 

1627) -> Callable: 

1628 

1629 if is_dates_only: 

1630 return lambda x, tz=None: _format_datetime64_dateonly( 

1631 x, nat_rep=nat_rep, date_format=date_format 

1632 ) 

1633 else: 

1634 return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep) 

1635 

1636 

1637def _get_format_datetime64_from_values( 

1638 values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str] 

1639) -> Optional[str]: 

1640 """ given values and a date_format, return a string format """ 

1641 

1642 if isinstance(values, np.ndarray) and values.ndim > 1: 

1643 # We don't actually care about the order of values, and DatetimeIndex 

1644 # only accepts 1D values 

1645 values = values.ravel() 

1646 

1647 is_dates_only = _is_dates_only(values) 

1648 if is_dates_only: 

1649 return date_format or "%Y-%m-%d" 

1650 return date_format 

1651 

1652 

1653class Datetime64TZFormatter(Datetime64Formatter): 

1654 def _format_strings(self) -> List[str]: 

1655 """ we by definition have a TZ """ 

1656 

1657 values = self.values.astype(object) 

1658 is_dates_only = _is_dates_only(values) 

1659 formatter = self.formatter or _get_format_datetime64( 

1660 is_dates_only, date_format=self.date_format 

1661 ) 

1662 fmt_values = [formatter(x) for x in values] 

1663 

1664 return fmt_values 

1665 

1666 

1667class Timedelta64Formatter(GenericArrayFormatter): 

1668 def __init__( 

1669 self, 

1670 values: Union[np.ndarray, TimedeltaIndex], 

1671 nat_rep: str = "NaT", 

1672 box: bool = False, 

1673 **kwargs, 

1674 ): 

1675 super().__init__(values, **kwargs) 

1676 self.nat_rep = nat_rep 

1677 self.box = box 

1678 

1679 def _format_strings(self) -> List[str]: 

1680 formatter = self.formatter or _get_format_timedelta64( 

1681 self.values, nat_rep=self.nat_rep, box=self.box 

1682 ) 

1683 return [formatter(x) for x in self.values] 

1684 

1685 

1686def _get_format_timedelta64( 

1687 values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray], 

1688 nat_rep: str = "NaT", 

1689 box: bool = False, 

1690) -> Callable: 

1691 """ 

1692 Return a formatter function for a range of timedeltas. 

1693 These will all have the same format argument 

1694 

1695 If box, then show the return in quotes 

1696 """ 

1697 

1698 values_int = values.astype(np.int64) 

1699 

1700 consider_values = values_int != iNaT 

1701 

1702 one_day_nanos = 86400 * 1e9 

1703 even_days = ( 

1704 np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0 

1705 ) 

1706 all_sub_day = ( 

1707 np.logical_and(consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0 

1708 ) 

1709 

1710 if even_days: 

1711 format = None 

1712 elif all_sub_day: 

1713 format = "sub_day" 

1714 else: 

1715 format = "long" 

1716 

1717 def _formatter(x): 

1718 if x is None or (is_scalar(x) and isna(x)): 

1719 return nat_rep 

1720 

1721 if not isinstance(x, Timedelta): 

1722 x = Timedelta(x) 

1723 result = x._repr_base(format=format) 

1724 if box: 

1725 result = "'{res}'".format(res=result) 

1726 return result 

1727 

1728 return _formatter 

1729 

1730 

1731def _make_fixed_width( 

1732 strings: List[str], 

1733 justify: str = "right", 

1734 minimum: Optional[int] = None, 

1735 adj: Optional[TextAdjustment] = None, 

1736) -> List[str]: 

1737 

1738 if len(strings) == 0 or justify == "all": 

1739 return strings 

1740 

1741 if adj is None: 

1742 adj = _get_adjustment() 

1743 

1744 max_len = max(adj.len(x) for x in strings) 

1745 

1746 if minimum is not None: 

1747 max_len = max(minimum, max_len) 

1748 

1749 conf_max = get_option("display.max_colwidth") 

1750 if conf_max is not None and max_len > conf_max: 

1751 max_len = conf_max 

1752 

1753 def just(x): 

1754 if conf_max is not None: 

1755 if (conf_max > 3) & (adj.len(x) > max_len): 

1756 x = x[: max_len - 3] + "..." 

1757 return x 

1758 

1759 strings = [just(x) for x in strings] 

1760 result = adj.justify(strings, max_len, mode=justify) 

1761 return result 

1762 

1763 

1764def _trim_zeros_complex(str_complexes: np.ndarray, na_rep: str = "NaN") -> List[str]: 

1765 """ 

1766 Separates the real and imaginary parts from the complex number, and 

1767 executes the _trim_zeros_float method on each of those. 

1768 """ 

1769 return [ 

1770 "".join(_trim_zeros_float(re.split(r"([j+-])", x), na_rep)) 

1771 for x in str_complexes 

1772 ] 

1773 

1774 

1775def _trim_zeros_float( 

1776 str_floats: Union[np.ndarray, List[str]], na_rep: str = "NaN" 

1777) -> List[str]: 

1778 """ 

1779 Trims zeros, leaving just one before the decimal points if need be. 

1780 """ 

1781 trimmed = str_floats 

1782 

1783 def _is_number(x): 

1784 return x != na_rep and not x.endswith("inf") 

1785 

1786 def _cond(values): 

1787 finite = [x for x in values if _is_number(x)] 

1788 return ( 

1789 len(finite) > 0 

1790 and all(x.endswith("0") for x in finite) 

1791 and not (any(("e" in x) or ("E" in x) for x in finite)) 

1792 ) 

1793 

1794 while _cond(trimmed): 

1795 trimmed = [x[:-1] if _is_number(x) else x for x in trimmed] 

1796 

1797 # leave one 0 after the decimal points if need be. 

1798 return [x + "0" if x.endswith(".") and _is_number(x) else x for x in trimmed] 

1799 

1800 

1801def _has_names(index: Index) -> bool: 

1802 if isinstance(index, ABCMultiIndex): 

1803 return com.any_not_none(*index.names) 

1804 else: 

1805 return index.name is not None 

1806 

1807 

1808class EngFormatter: 

1809 """ 

1810 Formats float values according to engineering format. 

1811 

1812 Based on matplotlib.ticker.EngFormatter 

1813 """ 

1814 

1815 # The SI engineering prefixes 

1816 ENG_PREFIXES = { 

1817 -24: "y", 

1818 -21: "z", 

1819 -18: "a", 

1820 -15: "f", 

1821 -12: "p", 

1822 -9: "n", 

1823 -6: "u", 

1824 -3: "m", 

1825 0: "", 

1826 3: "k", 

1827 6: "M", 

1828 9: "G", 

1829 12: "T", 

1830 15: "P", 

1831 18: "E", 

1832 21: "Z", 

1833 24: "Y", 

1834 } 

1835 

1836 def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False): 

1837 self.accuracy = accuracy 

1838 self.use_eng_prefix = use_eng_prefix 

1839 

1840 def __call__(self, num: Union[int, float]) -> str: 

1841 """ Formats a number in engineering notation, appending a letter 

1842 representing the power of 1000 of the original number. Some examples: 

1843 

1844 >>> format_eng(0) # for self.accuracy = 0 

1845 ' 0' 

1846 

1847 >>> format_eng(1000000) # for self.accuracy = 1, 

1848 # self.use_eng_prefix = True 

1849 ' 1.0M' 

1850 

1851 >>> format_eng("-1e-6") # for self.accuracy = 2 

1852 # self.use_eng_prefix = False 

1853 '-1.00E-06' 

1854 

1855 @param num: the value to represent 

1856 @type num: either a numeric value or a string that can be converted to 

1857 a numeric value (as per decimal.Decimal constructor) 

1858 

1859 @return: engineering formatted string 

1860 """ 

1861 dnum = decimal.Decimal(str(num)) 

1862 

1863 if decimal.Decimal.is_nan(dnum): 

1864 return "NaN" 

1865 

1866 if decimal.Decimal.is_infinite(dnum): 

1867 return "inf" 

1868 

1869 sign = 1 

1870 

1871 if dnum < 0: # pragma: no cover 

1872 sign = -1 

1873 dnum = -dnum 

1874 

1875 if dnum != 0: 

1876 pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3)) 

1877 else: 

1878 pow10 = decimal.Decimal(0) 

1879 

1880 pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) 

1881 pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) 

1882 int_pow10 = int(pow10) 

1883 

1884 if self.use_eng_prefix: 

1885 prefix = self.ENG_PREFIXES[int_pow10] 

1886 else: 

1887 if int_pow10 < 0: 

1888 prefix = "E-{pow10:02d}".format(pow10=-int_pow10) 

1889 else: 

1890 prefix = "E+{pow10:02d}".format(pow10=int_pow10) 

1891 

1892 mant = sign * dnum / (10 ** pow10) 

1893 

1894 if self.accuracy is None: # pragma: no cover 

1895 format_str = "{mant: g}{prefix}" 

1896 else: 

1897 format_str = "{{mant: .{acc:d}f}}{{prefix}}".format(acc=self.accuracy) 

1898 

1899 formatted = format_str.format(mant=mant, prefix=prefix) 

1900 

1901 return formatted 

1902 

1903 

1904def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: 

1905 """ 

1906 Alter default behavior on how float is formatted in DataFrame. 

1907 Format float in engineering format. By accuracy, we mean the number of 

1908 decimal digits after the floating point. 

1909 

1910 See also EngFormatter. 

1911 """ 

1912 

1913 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) 

1914 set_option("display.column_space", max(12, accuracy + 9)) 

1915 

1916 

1917def _binify(cols: List[int], line_width: int) -> List[int]: 

1918 adjoin_width = 1 

1919 bins = [] 

1920 curr_width = 0 

1921 i_last_column = len(cols) - 1 

1922 for i, w in enumerate(cols): 

1923 w_adjoined = w + adjoin_width 

1924 curr_width += w_adjoined 

1925 if i_last_column == i: 

1926 wrap = curr_width + 1 > line_width and i > 0 

1927 else: 

1928 wrap = curr_width + 2 > line_width and i > 0 

1929 if wrap: 

1930 bins.append(i) 

1931 curr_width = w_adjoined 

1932 

1933 bins.append(len(cols)) 

1934 return bins 

1935 

1936 

1937def get_level_lengths( 

1938 levels: Any, sentinel: Union[bool, object, str] = "" 

1939) -> List[Dict[int, int]]: 

1940 """For each index in each level the function returns lengths of indexes. 

1941 

1942 Parameters 

1943 ---------- 

1944 levels : list of lists 

1945 List of values on for level. 

1946 sentinel : string, optional 

1947 Value which states that no new index starts on there. 

1948 

1949 Returns 

1950 ------- 

1951 Returns list of maps. For each level returns map of indexes (key is index 

1952 in row and value is length of index). 

1953 """ 

1954 if len(levels) == 0: 

1955 return [] 

1956 

1957 control = [True] * len(levels[0]) 

1958 

1959 result = [] 

1960 for level in levels: 

1961 last_index = 0 

1962 

1963 lengths = {} 

1964 for i, key in enumerate(level): 

1965 if control[i] and key == sentinel: 

1966 pass 

1967 else: 

1968 control[i] = False 

1969 lengths[last_index] = i - last_index 

1970 last_index = i 

1971 

1972 lengths[last_index] = len(level) - last_index 

1973 

1974 result.append(lengths) 

1975 

1976 return result 

1977 

1978 

1979def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: 

1980 """ 

1981 Appends lines to a buffer. 

1982 

1983 Parameters 

1984 ---------- 

1985 buf 

1986 The buffer to write to 

1987 lines 

1988 The lines to append. 

1989 """ 

1990 if any(isinstance(x, str) for x in lines): 

1991 lines = [str(x) for x in lines] 

1992 buf.write("\n".join(lines))