Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/io/formats/format.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Internal module for formatting output data in csv, html,
3and latex files. This module also applies to display formatting.
4"""
6from contextlib import contextmanager
7from datetime import tzinfo
8import decimal
9from functools import partial
10from io import StringIO
11import math
12import re
13from shutil import get_terminal_size
14from typing import (
15 IO,
16 TYPE_CHECKING,
17 Any,
18 Callable,
19 Dict,
20 Iterable,
21 List,
22 Mapping,
23 Optional,
24 Sequence,
25 Tuple,
26 Type,
27 Union,
28 cast,
29)
30from unicodedata import east_asian_width
32import numpy as np
34from pandas._config.config import get_option, set_option
36from pandas._libs import lib
37from pandas._libs.missing import NA
38from pandas._libs.tslib import format_array_from_datetime
39from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
40from pandas._libs.tslibs.nattype import NaTType
41from pandas._typing import FilePathOrBuffer
42from pandas.errors import AbstractMethodError
44from pandas.core.dtypes.common import (
45 is_categorical_dtype,
46 is_complex_dtype,
47 is_datetime64_dtype,
48 is_datetime64tz_dtype,
49 is_extension_array_dtype,
50 is_float,
51 is_float_dtype,
52 is_integer,
53 is_integer_dtype,
54 is_list_like,
55 is_numeric_dtype,
56 is_scalar,
57 is_timedelta64_dtype,
58)
59from pandas.core.dtypes.generic import (
60 ABCIndexClass,
61 ABCMultiIndex,
62 ABCSeries,
63 ABCSparseArray,
64)
65from pandas.core.dtypes.missing import isna, notna
67from pandas.core.arrays.datetimes import DatetimeArray
68from pandas.core.arrays.timedeltas import TimedeltaArray
69from pandas.core.base import PandasObject
70import pandas.core.common as com
71from pandas.core.indexes.api import Index, ensure_index
72from pandas.core.indexes.datetimes import DatetimeIndex
73from pandas.core.indexes.timedeltas import TimedeltaIndex
75from pandas.io.common import stringify_path
76from pandas.io.formats.printing import adjoin, justify, pprint_thing
78if TYPE_CHECKING:
79 from pandas import Series, DataFrame, Categorical
81formatters_type = Union[
82 List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
83]
84float_format_type = Union[str, Callable, "EngFormatter"]
86common_docstring = """
87 Parameters
88 ----------
89 buf : str, Path or StringIO-like, optional, default None
90 Buffer to write to. If None, the output is returned as a string.
91 columns : sequence, optional, default None
92 The subset of columns to write. Writes all columns by default.
93 col_space : %(col_space_type)s, optional
94 %(col_space)s.
95 header : %(header_type)s, optional
96 %(header)s.
97 index : bool, optional, default True
98 Whether to print index (row) labels.
99 na_rep : str, optional, default 'NaN'
100 String representation of NAN to use.
101 formatters : list, tuple or dict of one-param. functions, optional
102 Formatter functions to apply to columns' elements by position or
103 name.
104 The result of each function must be a unicode string.
105 List/tuple must be of length equal to the number of columns.
106 float_format : one-parameter function, optional, default None
107 Formatter function to apply to columns' elements if they are
108 floats. The result of this function must be a unicode string.
109 sparsify : bool, optional, default True
110 Set to False for a DataFrame with a hierarchical index to print
111 every multiindex key at each row.
112 index_names : bool, optional, default True
113 Prints the names of the indexes.
114 justify : str, default None
115 How to justify the column labels. If None uses the option from
116 the print configuration (controlled by set_option), 'right' out
117 of the box. Valid values are
119 * left
120 * right
121 * center
122 * justify
123 * justify-all
124 * start
125 * end
126 * inherit
127 * match-parent
128 * initial
129 * unset.
130 max_rows : int, optional
131 Maximum number of rows to display in the console.
132 min_rows : int, optional
133 The number of rows to display in the console in a truncated repr
134 (when number of rows is above `max_rows`).
135 max_cols : int, optional
136 Maximum number of columns to display in the console.
137 show_dimensions : bool, default False
138 Display DataFrame dimensions (number of rows by number of columns).
139 decimal : str, default '.'
140 Character recognized as decimal separator, e.g. ',' in Europe.
141 """
143_VALID_JUSTIFY_PARAMETERS = (
144 "left",
145 "right",
146 "center",
147 "justify",
148 "justify-all",
149 "start",
150 "end",
151 "inherit",
152 "match-parent",
153 "initial",
154 "unset",
155)
157return_docstring = """
158 Returns
159 -------
160 str or None
161 If buf is None, returns the result as a string. Otherwise returns
162 None.
163 """
166class CategoricalFormatter:
167 def __init__(
168 self,
169 categorical: "Categorical",
170 buf: Optional[IO[str]] = None,
171 length: bool = True,
172 na_rep: str = "NaN",
173 footer: bool = True,
174 ):
175 self.categorical = categorical
176 self.buf = buf if buf is not None else StringIO("")
177 self.na_rep = na_rep
178 self.length = length
179 self.footer = footer
181 def _get_footer(self) -> str:
182 footer = ""
184 if self.length:
185 if footer:
186 footer += ", "
187 footer += "Length: {length}".format(length=len(self.categorical))
189 level_info = self.categorical._repr_categories_info()
191 # Levels are added in a newline
192 if footer:
193 footer += "\n"
194 footer += level_info
196 return str(footer)
198 def _get_formatted_values(self) -> List[str]:
199 return format_array(
200 self.categorical._internal_get_values(),
201 None,
202 float_format=None,
203 na_rep=self.na_rep,
204 )
206 def to_string(self) -> str:
207 categorical = self.categorical
209 if len(categorical) == 0:
210 if self.footer:
211 return self._get_footer()
212 else:
213 return ""
215 fmt_values = self._get_formatted_values()
217 fmt_values = ["{i}".format(i=i) for i in fmt_values]
218 fmt_values = [i.strip() for i in fmt_values]
219 values = ", ".join(fmt_values)
220 result = ["[" + values + "]"]
221 if self.footer:
222 footer = self._get_footer()
223 if footer:
224 result.append(footer)
226 return str("\n".join(result))
229class SeriesFormatter:
230 def __init__(
231 self,
232 series: "Series",
233 buf: Optional[IO[str]] = None,
234 length: Union[bool, str] = True,
235 header: bool = True,
236 index: bool = True,
237 na_rep: str = "NaN",
238 name: bool = False,
239 float_format: Optional[str] = None,
240 dtype: bool = True,
241 max_rows: Optional[int] = None,
242 min_rows: Optional[int] = None,
243 ):
244 self.series = series
245 self.buf = buf if buf is not None else StringIO()
246 self.name = name
247 self.na_rep = na_rep
248 self.header = header
249 self.length = length
250 self.index = index
251 self.max_rows = max_rows
252 self.min_rows = min_rows
254 if float_format is None:
255 float_format = get_option("display.float_format")
256 self.float_format = float_format
257 self.dtype = dtype
258 self.adj = _get_adjustment()
260 self._chk_truncate()
262 def _chk_truncate(self) -> None:
263 from pandas.core.reshape.concat import concat
265 self.tr_row_num: Optional[int]
267 min_rows = self.min_rows
268 max_rows = self.max_rows
269 # truncation determined by max_rows, actual truncated number of rows
270 # used below by min_rows
271 truncate_v = max_rows and (len(self.series) > max_rows)
272 series = self.series
273 if truncate_v:
274 max_rows = cast(int, max_rows)
275 if min_rows:
276 # if min_rows is set (not None or 0), set max_rows to minimum
277 # of both
278 max_rows = min(min_rows, max_rows)
279 if max_rows == 1:
280 row_num = max_rows
281 series = series.iloc[:max_rows]
282 else:
283 row_num = max_rows // 2
284 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
285 self.tr_row_num = row_num
286 else:
287 self.tr_row_num = None
288 self.tr_series = series
289 self.truncate_v = truncate_v
291 def _get_footer(self) -> str:
292 name = self.series.name
293 footer = ""
295 if getattr(self.series.index, "freq", None) is not None:
296 footer += "Freq: {freq}".format(freq=self.series.index.freqstr)
298 if self.name is not False and name is not None:
299 if footer:
300 footer += ", "
302 series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))
303 footer += (
304 ("Name: {sname}".format(sname=series_name)) if name is not None else ""
305 )
307 if self.length is True or (self.length == "truncate" and self.truncate_v):
308 if footer:
309 footer += ", "
310 footer += "Length: {length}".format(length=len(self.series))
312 if self.dtype is not False and self.dtype is not None:
313 name = getattr(self.tr_series.dtype, "name", None)
314 if name:
315 if footer:
316 footer += ", "
317 footer += "dtype: {typ}".format(typ=pprint_thing(name))
319 # level infos are added to the end and in a new line, like it is done
320 # for Categoricals
321 if is_categorical_dtype(self.tr_series.dtype):
322 level_info = self.tr_series._values._repr_categories_info()
323 if footer:
324 footer += "\n"
325 footer += level_info
327 return str(footer)
329 def _get_formatted_index(self) -> Tuple[List[str], bool]:
330 index = self.tr_series.index
331 is_multi = isinstance(index, ABCMultiIndex)
333 if is_multi:
334 have_header = any(name for name in index.names)
335 fmt_index = index.format(names=True)
336 else:
337 have_header = index.name is not None
338 fmt_index = index.format(name=True)
339 return fmt_index, have_header
341 def _get_formatted_values(self) -> List[str]:
342 return format_array(
343 self.tr_series._values,
344 None,
345 float_format=self.float_format,
346 na_rep=self.na_rep,
347 )
349 def to_string(self) -> str:
350 series = self.tr_series
351 footer = self._get_footer()
353 if len(series) == 0:
354 return "{name}([], {footer})".format(
355 name=type(self.series).__name__, footer=footer
356 )
358 fmt_index, have_header = self._get_formatted_index()
359 fmt_values = self._get_formatted_values()
361 if self.truncate_v:
362 n_header_rows = 0
363 row_num = self.tr_row_num
364 row_num = cast(int, row_num)
365 width = self.adj.len(fmt_values[row_num - 1])
366 if width > 3:
367 dot_str = "..."
368 else:
369 dot_str = ".."
370 # Series uses mode=center because it has single value columns
371 # DataFrame uses mode=left
372 dot_str = self.adj.justify([dot_str], width, mode="center")[0]
373 fmt_values.insert(row_num + n_header_rows, dot_str)
374 fmt_index.insert(row_num + 1, "")
376 if self.index:
377 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
378 else:
379 result = self.adj.adjoin(3, fmt_values)
381 if self.header and have_header:
382 result = fmt_index[0] + "\n" + result
384 if footer:
385 result += "\n" + footer
387 return str("".join(result))
390class TextAdjustment:
391 def __init__(self):
392 self.encoding = get_option("display.encoding")
394 def len(self, text: str) -> int:
395 return len(text)
397 def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]:
398 return justify(texts, max_len, mode=mode)
400 def adjoin(self, space: int, *lists, **kwargs) -> str:
401 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
404class EastAsianTextAdjustment(TextAdjustment):
405 def __init__(self):
406 super().__init__()
407 if get_option("display.unicode.ambiguous_as_wide"):
408 self.ambiguous_width = 2
409 else:
410 self.ambiguous_width = 1
412 # Definition of East Asian Width
413 # http://unicode.org/reports/tr11/
414 # Ambiguous width can be changed by option
415 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
417 def len(self, text: str) -> int:
418 """
419 Calculate display width considering unicode East Asian Width
420 """
421 if not isinstance(text, str):
422 return len(text)
424 return sum(
425 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
426 )
428 def justify(
429 self, texts: Iterable[str], max_len: int, mode: str = "right"
430 ) -> List[str]:
431 # re-calculate padding space per str considering East Asian Width
432 def _get_pad(t):
433 return max_len - self.len(t) + len(t)
435 if mode == "left":
436 return [x.ljust(_get_pad(x)) for x in texts]
437 elif mode == "center":
438 return [x.center(_get_pad(x)) for x in texts]
439 else:
440 return [x.rjust(_get_pad(x)) for x in texts]
443def _get_adjustment() -> TextAdjustment:
444 use_east_asian_width = get_option("display.unicode.east_asian_width")
445 if use_east_asian_width:
446 return EastAsianTextAdjustment()
447 else:
448 return TextAdjustment()
451class TableFormatter:
453 show_dimensions: Union[bool, str]
454 is_truncated: bool
455 formatters: formatters_type
456 columns: Index
458 @property
459 def should_show_dimensions(self) -> bool:
460 return self.show_dimensions is True or (
461 self.show_dimensions == "truncate" and self.is_truncated
462 )
464 def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]:
465 if isinstance(self.formatters, (list, tuple)):
466 if is_integer(i):
467 i = cast(int, i)
468 return self.formatters[i]
469 else:
470 return None
471 else:
472 if is_integer(i) and i not in self.columns:
473 i = self.columns[i]
474 return self.formatters.get(i, None)
476 @contextmanager
477 def get_buffer(
478 self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None
479 ):
480 """
481 Context manager to open, yield and close buffer for filenames or Path-like
482 objects, otherwise yield buf unchanged.
483 """
484 if buf is not None:
485 buf = stringify_path(buf)
486 else:
487 buf = StringIO()
489 if encoding is None:
490 encoding = "utf-8"
491 elif not isinstance(buf, str):
492 raise ValueError("buf is not a file name and encoding is specified.")
494 if hasattr(buf, "write"):
495 yield buf
496 elif isinstance(buf, str):
497 with open(buf, "w", encoding=encoding, newline="") as f:
498 # GH#30034 open instead of codecs.open prevents a file leak
499 # if we have an invalid encoding argument.
500 # newline="" is needed to roundtrip correctly on
501 # windows test_to_latex_filename
502 yield f
503 else:
504 raise TypeError("buf is not a file name and it has no write method")
506 def write_result(self, buf: IO[str]) -> None:
507 """
508 Write the result of serialization to buf.
509 """
510 raise AbstractMethodError(self)
512 def get_result(
513 self,
514 buf: Optional[FilePathOrBuffer[str]] = None,
515 encoding: Optional[str] = None,
516 ) -> Optional[str]:
517 """
518 Perform serialization. Write to buf or return as string if buf is None.
519 """
520 with self.get_buffer(buf, encoding=encoding) as f:
521 self.write_result(buf=f)
522 if buf is None:
523 return f.getvalue()
524 return None
527class DataFrameFormatter(TableFormatter):
528 """
529 Render a DataFrame
531 self.to_string() : console-friendly tabular output
532 self.to_html() : html table
533 self.to_latex() : LaTeX tabular environment table
535 """
537 __doc__ = __doc__ if __doc__ else ""
538 __doc__ += common_docstring + return_docstring
540 def __init__(
541 self,
542 frame: "DataFrame",
543 columns: Optional[Sequence[str]] = None,
544 col_space: Optional[Union[str, int]] = None,
545 header: Union[bool, Sequence[str]] = True,
546 index: bool = True,
547 na_rep: str = "NaN",
548 formatters: Optional[formatters_type] = None,
549 justify: Optional[str] = None,
550 float_format: Optional[float_format_type] = None,
551 sparsify: Optional[bool] = None,
552 index_names: bool = True,
553 line_width: Optional[int] = None,
554 max_rows: Optional[int] = None,
555 min_rows: Optional[int] = None,
556 max_cols: Optional[int] = None,
557 show_dimensions: Union[bool, str] = False,
558 decimal: str = ".",
559 table_id: Optional[str] = None,
560 render_links: bool = False,
561 bold_rows: bool = False,
562 escape: bool = True,
563 ):
564 self.frame = frame
565 self.show_index_names = index_names
567 if sparsify is None:
568 sparsify = get_option("display.multi_sparse")
570 self.sparsify = sparsify
572 self.float_format = float_format
573 if formatters is None:
574 self.formatters = {}
575 elif len(frame.columns) == len(formatters) or isinstance(formatters, dict):
576 self.formatters = formatters
577 else:
578 raise ValueError(
579 (
580 "Formatters length({flen}) should match "
581 "DataFrame number of columns({dlen})"
582 ).format(flen=len(formatters), dlen=len(frame.columns))
583 )
584 self.na_rep = na_rep
585 self.decimal = decimal
586 self.col_space = col_space
587 self.header = header
588 self.index = index
589 self.line_width = line_width
590 self.max_rows = max_rows
591 self.min_rows = min_rows
592 self.max_cols = max_cols
593 self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame))
594 self.show_dimensions = show_dimensions
595 self.table_id = table_id
596 self.render_links = render_links
598 if justify is None:
599 self.justify = get_option("display.colheader_justify")
600 else:
601 self.justify = justify
603 self.bold_rows = bold_rows
604 self.escape = escape
606 if columns is not None:
607 self.columns = ensure_index(columns)
608 self.frame = self.frame[self.columns]
609 else:
610 self.columns = frame.columns
612 self._chk_truncate()
613 self.adj = _get_adjustment()
615 def _chk_truncate(self) -> None:
616 """
617 Checks whether the frame should be truncated. If so, slices
618 the frame up.
619 """
620 from pandas.core.reshape.concat import concat
622 # Cut the data to the information actually printed
623 max_cols = self.max_cols
624 max_rows = self.max_rows
625 self.max_rows_adj: Optional[int]
626 max_rows_adj: Optional[int]
628 if max_cols == 0 or max_rows == 0: # assume we are in the terminal
629 (w, h) = get_terminal_size()
630 self.w = w
631 self.h = h
632 if self.max_rows == 0:
633 dot_row = 1
634 prompt_row = 1
635 if self.show_dimensions:
636 show_dimension_rows = 3
637 # assume we only get here if self.header is boolean.
638 # i.e. not to_latex() where self.header may be List[str]
639 self.header = cast(bool, self.header)
640 n_add_rows = self.header + dot_row + show_dimension_rows + prompt_row
641 # rows available to fill with actual data
642 max_rows_adj = self.h - n_add_rows
643 self.max_rows_adj = max_rows_adj
645 # Format only rows and columns that could potentially fit the
646 # screen
647 if max_cols == 0 and len(self.frame.columns) > w:
648 max_cols = w
649 if max_rows == 0 and len(self.frame) > h:
650 max_rows = h
652 if not hasattr(self, "max_rows_adj"):
653 if max_rows:
654 if (len(self.frame) > max_rows) and self.min_rows:
655 # if truncated, set max_rows showed to min_rows
656 max_rows = min(self.min_rows, max_rows)
657 self.max_rows_adj = max_rows
658 if not hasattr(self, "max_cols_adj"):
659 self.max_cols_adj = max_cols
661 max_cols_adj = self.max_cols_adj
662 max_rows_adj = self.max_rows_adj
664 truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj)
665 truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj)
667 frame = self.frame
668 if truncate_h:
669 # cast here since if truncate_h is True, max_cols_adj is not None
670 max_cols_adj = cast(int, max_cols_adj)
671 if max_cols_adj == 0:
672 col_num = len(frame.columns)
673 elif max_cols_adj == 1:
674 max_cols = cast(int, max_cols)
675 frame = frame.iloc[:, :max_cols]
676 col_num = max_cols
677 else:
678 col_num = max_cols_adj // 2
679 frame = concat(
680 (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1
681 )
682 # truncate formatter
683 if isinstance(self.formatters, (list, tuple)):
684 truncate_fmt = self.formatters
685 self.formatters = [
686 *truncate_fmt[:col_num],
687 *truncate_fmt[-col_num:],
688 ]
689 self.tr_col_num = col_num
690 if truncate_v:
691 # cast here since if truncate_v is True, max_rows_adj is not None
692 max_rows_adj = cast(int, max_rows_adj)
693 if max_rows_adj == 1:
694 row_num = max_rows
695 frame = frame.iloc[:max_rows, :]
696 else:
697 row_num = max_rows_adj // 2
698 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
699 self.tr_row_num = row_num
700 else:
701 self.tr_row_num = None
703 self.tr_frame = frame
704 self.truncate_h = truncate_h
705 self.truncate_v = truncate_v
706 self.is_truncated = bool(self.truncate_h or self.truncate_v)
708 def _to_str_columns(self) -> List[List[str]]:
709 """
710 Render a DataFrame to a list of columns (as lists of strings).
711 """
712 # this method is not used by to_html where self.col_space
713 # could be a string so safe to cast
714 self.col_space = cast(int, self.col_space)
716 frame = self.tr_frame
717 # may include levels names also
719 str_index = self._get_formatted_index(frame)
721 if not is_list_like(self.header) and not self.header:
722 stringified = []
723 for i, c in enumerate(frame):
724 fmt_values = self._format_col(i)
725 fmt_values = _make_fixed_width(
726 fmt_values,
727 self.justify,
728 minimum=(self.col_space or 0),
729 adj=self.adj,
730 )
731 stringified.append(fmt_values)
732 else:
733 if is_list_like(self.header):
734 # cast here since can't be bool if is_list_like
735 self.header = cast(List[str], self.header)
736 if len(self.header) != len(self.columns):
737 raise ValueError(
738 (
739 "Writing {ncols} cols but got {nalias} "
740 "aliases".format(
741 ncols=len(self.columns), nalias=len(self.header)
742 )
743 )
744 )
745 str_columns = [[label] for label in self.header]
746 else:
747 str_columns = self._get_formatted_column_labels(frame)
749 if self.show_row_idx_names:
750 for x in str_columns:
751 x.append("")
753 stringified = []
754 for i, c in enumerate(frame):
755 cheader = str_columns[i]
756 header_colwidth = max(
757 self.col_space or 0, *(self.adj.len(x) for x in cheader)
758 )
759 fmt_values = self._format_col(i)
760 fmt_values = _make_fixed_width(
761 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
762 )
764 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
765 cheader = self.adj.justify(cheader, max_len, mode=self.justify)
766 stringified.append(cheader + fmt_values)
768 strcols = stringified
769 if self.index:
770 strcols.insert(0, str_index)
772 # Add ... to signal truncated
773 truncate_h = self.truncate_h
774 truncate_v = self.truncate_v
776 if truncate_h:
777 col_num = self.tr_col_num
778 strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index)))
779 if truncate_v:
780 n_header_rows = len(str_index) - len(frame)
781 row_num = self.tr_row_num
782 # cast here since if truncate_v is True, self.tr_row_num is not None
783 row_num = cast(int, row_num)
784 for ix, col in enumerate(strcols):
785 # infer from above row
786 cwidth = self.adj.len(strcols[ix][row_num])
787 is_dot_col = False
788 if truncate_h:
789 is_dot_col = ix == col_num + 1
790 if cwidth > 3 or is_dot_col:
791 my_str = "..."
792 else:
793 my_str = ".."
795 if ix == 0:
796 dot_mode = "left"
797 elif is_dot_col:
798 cwidth = 4
799 dot_mode = "right"
800 else:
801 dot_mode = "right"
802 dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0]
803 strcols[ix].insert(row_num + n_header_rows, dot_str)
804 return strcols
806 def write_result(self, buf: IO[str]) -> None:
807 """
808 Render a DataFrame to a console-friendly tabular output.
809 """
810 from pandas import Series
812 frame = self.frame
814 if len(frame.columns) == 0 or len(frame.index) == 0:
815 info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format(
816 name=type(self.frame).__name__,
817 col=pprint_thing(frame.columns),
818 idx=pprint_thing(frame.index),
819 )
820 text = info_line
821 else:
823 strcols = self._to_str_columns()
824 if self.line_width is None: # no need to wrap around just print
825 # the whole frame
826 text = self.adj.adjoin(1, *strcols)
827 elif (
828 not isinstance(self.max_cols, int) or self.max_cols > 0
829 ): # need to wrap around
830 text = self._join_multiline(*strcols)
831 else: # max_cols == 0. Try to fit frame to terminal
832 lines = self.adj.adjoin(1, *strcols).split("\n")
833 max_len = Series(lines).str.len().max()
834 # plus truncate dot col
835 dif = max_len - self.w
836 # '+ 1' to avoid too wide repr (GH PR #17023)
837 adj_dif = dif + 1
838 col_lens = Series([Series(ele).apply(len).max() for ele in strcols])
839 n_cols = len(col_lens)
840 counter = 0
841 while adj_dif > 0 and n_cols > 1:
842 counter += 1
843 mid = int(round(n_cols / 2.0))
844 mid_ix = col_lens.index[mid]
845 col_len = col_lens[mid_ix]
846 # adjoin adds one
847 adj_dif -= col_len + 1
848 col_lens = col_lens.drop(mid_ix)
849 n_cols = len(col_lens)
850 # subtract index column
851 max_cols_adj = n_cols - self.index
852 # GH-21180. Ensure that we print at least two.
853 max_cols_adj = max(max_cols_adj, 2)
854 self.max_cols_adj = max_cols_adj
856 # Call again _chk_truncate to cut frame appropriately
857 # and then generate string representation
858 self._chk_truncate()
859 strcols = self._to_str_columns()
860 text = self.adj.adjoin(1, *strcols)
861 buf.writelines(text)
863 if self.should_show_dimensions:
864 buf.write(
865 "\n\n[{nrows} rows x {ncols} columns]".format(
866 nrows=len(frame), ncols=len(frame.columns)
867 )
868 )
870 def _join_multiline(self, *args) -> str:
871 lwidth = self.line_width
872 adjoin_width = 1
873 strcols = list(args)
874 if self.index:
875 idx = strcols.pop(0)
876 lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
878 col_widths = [
879 np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
880 for col in strcols
881 ]
883 assert lwidth is not None
884 col_bins = _binify(col_widths, lwidth)
885 nbins = len(col_bins)
887 if self.truncate_v:
888 # cast here since if truncate_v is True, max_rows_adj is not None
889 self.max_rows_adj = cast(int, self.max_rows_adj)
890 nrows = self.max_rows_adj + 1
891 else:
892 nrows = len(self.frame)
894 str_lst = []
895 st = 0
896 for i, ed in enumerate(col_bins):
897 row = strcols[st:ed]
898 if self.index:
899 row.insert(0, idx)
900 if nbins > 1:
901 if ed <= len(strcols) and i < nbins - 1:
902 row.append([" \\"] + [" "] * (nrows - 1))
903 else:
904 row.append([" "] * nrows)
905 str_lst.append(self.adj.adjoin(adjoin_width, *row))
906 st = ed
907 return "\n\n".join(str_lst)
909 def to_string(
910 self,
911 buf: Optional[FilePathOrBuffer[str]] = None,
912 encoding: Optional[str] = None,
913 ) -> Optional[str]:
914 return self.get_result(buf=buf, encoding=encoding)
916 def to_latex(
917 self,
918 buf: Optional[FilePathOrBuffer[str]] = None,
919 column_format: Optional[str] = None,
920 longtable: bool = False,
921 encoding: Optional[str] = None,
922 multicolumn: bool = False,
923 multicolumn_format: Optional[str] = None,
924 multirow: bool = False,
925 caption: Optional[str] = None,
926 label: Optional[str] = None,
927 ) -> Optional[str]:
928 """
929 Render a DataFrame to a LaTeX tabular/longtable environment output.
930 """
932 from pandas.io.formats.latex import LatexFormatter
934 return LatexFormatter(
935 self,
936 column_format=column_format,
937 longtable=longtable,
938 multicolumn=multicolumn,
939 multicolumn_format=multicolumn_format,
940 multirow=multirow,
941 caption=caption,
942 label=label,
943 ).get_result(buf=buf, encoding=encoding)
945 def _format_col(self, i: int) -> List[str]:
946 frame = self.tr_frame
947 formatter = self._get_formatter(i)
948 return format_array(
949 frame.iloc[:, i]._values,
950 formatter,
951 float_format=self.float_format,
952 na_rep=self.na_rep,
953 space=self.col_space,
954 decimal=self.decimal,
955 )
957 def to_html(
958 self,
959 buf: Optional[FilePathOrBuffer[str]] = None,
960 encoding: Optional[str] = None,
961 classes: Optional[Union[str, List, Tuple]] = None,
962 notebook: bool = False,
963 border: Optional[int] = None,
964 ) -> Optional[str]:
965 """
966 Render a DataFrame to a html table.
968 Parameters
969 ----------
970 classes : str or list-like
971 classes to include in the `class` attribute of the opening
972 ``<table>`` tag, in addition to the default "dataframe".
973 notebook : {True, False}, optional, default False
974 Whether the generated HTML is for IPython Notebook.
975 border : int
976 A ``border=border`` attribute is included in the opening
977 ``<table>`` tag. Default ``pd.options.display.html.border``.
978 """
979 from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
981 Klass = NotebookFormatter if notebook else HTMLFormatter
982 return Klass(self, classes=classes, border=border).get_result(
983 buf=buf, encoding=encoding
984 )
986 def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]:
987 from pandas.core.indexes.multi import _sparsify
989 columns = frame.columns
991 if isinstance(columns, ABCMultiIndex):
992 fmt_columns = columns.format(sparsify=False, adjoin=False)
993 fmt_columns = list(zip(*fmt_columns))
994 dtypes = self.frame.dtypes._values
996 # if we have a Float level, they don't use leading space at all
997 restrict_formatting = any(l.is_floating for l in columns.levels)
998 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
1000 def space_format(x, y):
1001 if (
1002 y not in self.formatters
1003 and need_leadsp[x]
1004 and not restrict_formatting
1005 ):
1006 return " " + y
1007 return y
1009 str_columns = list(
1010 zip(*[[space_format(x, y) for y in x] for x in fmt_columns])
1011 )
1012 if self.sparsify and len(str_columns):
1013 str_columns = _sparsify(str_columns)
1015 str_columns = [list(x) for x in zip(*str_columns)]
1016 else:
1017 fmt_columns = columns.format()
1018 dtypes = self.frame.dtypes
1019 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
1020 str_columns = [
1021 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
1022 for i, (col, x) in enumerate(zip(columns, fmt_columns))
1023 ]
1024 # self.str_columns = str_columns
1025 return str_columns
1027 @property
1028 def has_index_names(self) -> bool:
1029 return _has_names(self.frame.index)
1031 @property
1032 def has_column_names(self) -> bool:
1033 return _has_names(self.frame.columns)
1035 @property
1036 def show_row_idx_names(self) -> bool:
1037 return all((self.has_index_names, self.index, self.show_index_names))
1039 @property
1040 def show_col_idx_names(self) -> bool:
1041 return all((self.has_column_names, self.show_index_names, self.header))
1043 def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
1044 # Note: this is only used by to_string() and to_latex(), not by
1045 # to_html(). so safe to cast col_space here.
1046 self.col_space = cast(int, self.col_space)
1047 index = frame.index
1048 columns = frame.columns
1049 fmt = self._get_formatter("__index__")
1051 if isinstance(index, ABCMultiIndex):
1052 fmt_index = index.format(
1053 sparsify=self.sparsify,
1054 adjoin=False,
1055 names=self.show_row_idx_names,
1056 formatter=fmt,
1057 )
1058 else:
1059 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
1061 fmt_index = [
1062 tuple(
1063 _make_fixed_width(
1064 list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj
1065 )
1066 )
1067 for x in fmt_index
1068 ]
1070 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
1072 # empty space for columns
1073 if self.show_col_idx_names:
1074 col_header = ["{x}".format(x=x) for x in self._get_column_name_list()]
1075 else:
1076 col_header = [""] * columns.nlevels
1078 if self.header:
1079 return col_header + adjoined
1080 else:
1081 return adjoined
1083 def _get_column_name_list(self) -> List[str]:
1084 names: List[str] = []
1085 columns = self.frame.columns
1086 if isinstance(columns, ABCMultiIndex):
1087 names.extend("" if name is None else name for name in columns.names)
1088 else:
1089 names.append("" if columns.name is None else columns.name)
1090 return names
1093# ----------------------------------------------------------------------
1094# Array formatters
1097def format_array(
1098 values: Any,
1099 formatter: Optional[Callable],
1100 float_format: Optional[float_format_type] = None,
1101 na_rep: str = "NaN",
1102 digits: Optional[int] = None,
1103 space: Optional[Union[str, int]] = None,
1104 justify: str = "right",
1105 decimal: str = ".",
1106 leading_space: Optional[bool] = None,
1107) -> List[str]:
1108 """
1109 Format an array for printing.
1111 Parameters
1112 ----------
1113 values
1114 formatter
1115 float_format
1116 na_rep
1117 digits
1118 space
1119 justify
1120 decimal
1121 leading_space : bool, optional
1122 Whether the array should be formatted with a leading space.
1123 When an array as a column of a Series or DataFrame, we do want
1124 the leading space to pad between columns.
1126 When formatting an Index subclass
1127 (e.g. IntervalIndex._format_native_types), we don't want the
1128 leading space since it should be left-aligned.
1130 Returns
1131 -------
1132 List[str]
1133 """
1135 fmt_klass: Type[GenericArrayFormatter]
1136 if is_datetime64_dtype(values.dtype):
1137 fmt_klass = Datetime64Formatter
1138 elif is_datetime64tz_dtype(values):
1139 fmt_klass = Datetime64TZFormatter
1140 elif is_timedelta64_dtype(values.dtype):
1141 fmt_klass = Timedelta64Formatter
1142 elif is_extension_array_dtype(values.dtype):
1143 fmt_klass = ExtensionArrayFormatter
1144 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
1145 fmt_klass = FloatArrayFormatter
1146 elif is_integer_dtype(values.dtype):
1147 fmt_klass = IntArrayFormatter
1148 else:
1149 fmt_klass = GenericArrayFormatter
1151 if space is None:
1152 space = get_option("display.column_space")
1154 if float_format is None:
1155 float_format = get_option("display.float_format")
1157 if digits is None:
1158 digits = get_option("display.precision")
1160 fmt_obj = fmt_klass(
1161 values,
1162 digits=digits,
1163 na_rep=na_rep,
1164 float_format=float_format,
1165 formatter=formatter,
1166 space=space,
1167 justify=justify,
1168 decimal=decimal,
1169 leading_space=leading_space,
1170 )
1172 return fmt_obj.get_result()
1175class GenericArrayFormatter:
1176 def __init__(
1177 self,
1178 values: Any,
1179 digits: int = 7,
1180 formatter: Optional[Callable] = None,
1181 na_rep: str = "NaN",
1182 space: Union[str, int] = 12,
1183 float_format: Optional[float_format_type] = None,
1184 justify: str = "right",
1185 decimal: str = ".",
1186 quoting: Optional[int] = None,
1187 fixed_width: bool = True,
1188 leading_space: Optional[bool] = None,
1189 ):
1190 self.values = values
1191 self.digits = digits
1192 self.na_rep = na_rep
1193 self.space = space
1194 self.formatter = formatter
1195 self.float_format = float_format
1196 self.justify = justify
1197 self.decimal = decimal
1198 self.quoting = quoting
1199 self.fixed_width = fixed_width
1200 self.leading_space = leading_space
1202 def get_result(self) -> List[str]:
1203 fmt_values = self._format_strings()
1204 return _make_fixed_width(fmt_values, self.justify)
1206 def _format_strings(self) -> List[str]:
1207 if self.float_format is None:
1208 float_format = get_option("display.float_format")
1209 if float_format is None:
1210 fmt_str = "{{x: .{prec:d}g}}".format(
1211 prec=get_option("display.precision")
1212 )
1213 float_format = lambda x: fmt_str.format(x=x)
1214 else:
1215 float_format = self.float_format
1217 formatter = (
1218 self.formatter
1219 if self.formatter is not None
1220 else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n")))
1221 )
1223 def _format(x):
1224 if self.na_rep is not None and is_scalar(x) and isna(x):
1225 try:
1226 # try block for np.isnat specifically
1227 # determine na_rep if x is None or NaT-like
1228 if x is None:
1229 return "None"
1230 elif x is NA:
1231 return str(NA)
1232 elif x is NaT or np.isnat(x):
1233 return "NaT"
1234 except (TypeError, ValueError):
1235 # np.isnat only handles datetime or timedelta objects
1236 pass
1237 return self.na_rep
1238 elif isinstance(x, PandasObject):
1239 return "{x}".format(x=x)
1240 else:
1241 # object dtype
1242 return "{x}".format(x=formatter(x))
1244 vals = self.values
1245 if isinstance(vals, Index):
1246 vals = vals._values
1247 elif isinstance(vals, ABCSparseArray):
1248 vals = vals.values
1250 is_float_type = lib.map_infer(vals, is_float) & notna(vals)
1251 leading_space = self.leading_space
1252 if leading_space is None:
1253 leading_space = is_float_type.any()
1255 fmt_values = []
1256 for i, v in enumerate(vals):
1257 if not is_float_type[i] and leading_space:
1258 fmt_values.append(" {v}".format(v=_format(v)))
1259 elif is_float_type[i]:
1260 fmt_values.append(float_format(v))
1261 else:
1262 if leading_space is False:
1263 # False specifically, so that the default is
1264 # to include a space if we get here.
1265 tpl = "{v}"
1266 else:
1267 tpl = " {v}"
1268 fmt_values.append(tpl.format(v=_format(v)))
1270 return fmt_values
1273class FloatArrayFormatter(GenericArrayFormatter):
1274 """
1276 """
1278 def __init__(self, *args, **kwargs):
1279 super().__init__(*args, **kwargs)
1281 # float_format is expected to be a string
1282 # formatter should be used to pass a function
1283 if self.float_format is not None and self.formatter is None:
1284 # GH21625, GH22270
1285 self.fixed_width = False
1286 if callable(self.float_format):
1287 self.formatter = self.float_format
1288 self.float_format = None
1290 def _value_formatter(
1291 self,
1292 float_format: Optional[float_format_type] = None,
1293 threshold: Optional[Union[float, int]] = None,
1294 ) -> Callable:
1295 """Returns a function to be applied on each value to format it
1296 """
1298 # the float_format parameter supersedes self.float_format
1299 if float_format is None:
1300 float_format = self.float_format
1302 # we are going to compose different functions, to first convert to
1303 # a string, then replace the decimal symbol, and finally chop according
1304 # to the threshold
1306 # when there is no float_format, we use str instead of '%g'
1307 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
1308 if float_format:
1310 def base_formatter(v):
1311 return float_format(value=v) if notna(v) else self.na_rep
1313 else:
1315 def base_formatter(v):
1316 return str(v) if notna(v) else self.na_rep
1318 if self.decimal != ".":
1320 def decimal_formatter(v):
1321 return base_formatter(v).replace(".", self.decimal, 1)
1323 else:
1324 decimal_formatter = base_formatter
1326 if threshold is None:
1327 return decimal_formatter
1329 def formatter(value):
1330 if notna(value):
1331 if abs(value) > threshold:
1332 return decimal_formatter(value)
1333 else:
1334 return decimal_formatter(0.0)
1335 else:
1336 return self.na_rep
1338 return formatter
1340 def get_result_as_array(self) -> np.ndarray:
1341 """
1342 Returns the float values converted into strings using
1343 the parameters given at initialisation, as a numpy array
1344 """
1346 if self.formatter is not None:
1347 return np.array([self.formatter(x) for x in self.values])
1349 if self.fixed_width:
1350 threshold = get_option("display.chop_threshold")
1351 else:
1352 threshold = None
1354 # if we have a fixed_width, we'll need to try different float_format
1355 def format_values_with(float_format):
1356 formatter = self._value_formatter(float_format, threshold)
1358 # default formatter leaves a space to the left when formatting
1359 # floats, must be consistent for left-justifying NaNs (GH #25061)
1360 if self.justify == "left":
1361 na_rep = " " + self.na_rep
1362 else:
1363 na_rep = self.na_rep
1365 # separate the wheat from the chaff
1366 values = self.values
1367 is_complex = is_complex_dtype(values)
1368 mask = isna(values)
1369 if hasattr(values, "to_dense"): # sparse numpy ndarray
1370 values = values.to_dense()
1371 values = np.array(values, dtype="object")
1372 values[mask] = na_rep
1373 imask = (~mask).ravel()
1374 values.flat[imask] = np.array(
1375 [formatter(val) for val in values.ravel()[imask]]
1376 )
1378 if self.fixed_width:
1379 if is_complex:
1380 result = _trim_zeros_complex(values, na_rep)
1381 else:
1382 result = _trim_zeros_float(values, na_rep)
1383 return np.asarray(result, dtype="object")
1385 return values
1387 # There is a special default string when we are fixed-width
1388 # The default is otherwise to use str instead of a formatting string
1389 float_format: Optional[float_format_type]
1390 if self.float_format is None:
1391 if self.fixed_width:
1392 float_format = partial(
1393 "{value: .{digits:d}f}".format, digits=self.digits
1394 )
1395 else:
1396 float_format = self.float_format
1397 else:
1398 float_format = lambda value: self.float_format % value
1400 formatted_values = format_values_with(float_format)
1402 if not self.fixed_width:
1403 return formatted_values
1405 # we need do convert to engineering format if some values are too small
1406 # and would appear as 0, or if some values are too big and take too
1407 # much space
1409 if len(formatted_values) > 0:
1410 maxlen = max(len(x) for x in formatted_values)
1411 too_long = maxlen > self.digits + 6
1412 else:
1413 too_long = False
1415 with np.errstate(invalid="ignore"):
1416 abs_vals = np.abs(self.values)
1417 # this is pretty arbitrary for now
1418 # large values: more that 8 characters including decimal symbol
1419 # and first digit, hence > 1e6
1420 has_large_values = (abs_vals > 1e6).any()
1421 has_small_values = (
1422 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
1423 ).any()
1425 if has_small_values or (too_long and has_large_values):
1426 float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
1427 formatted_values = format_values_with(float_format)
1429 return formatted_values
1431 def _format_strings(self) -> List[str]:
1432 # shortcut
1433 if self.formatter is not None:
1434 return [self.formatter(x) for x in self.values]
1436 return list(self.get_result_as_array())
1439class IntArrayFormatter(GenericArrayFormatter):
1440 def _format_strings(self) -> List[str]:
1441 formatter = self.formatter or (lambda x: "{x: d}".format(x=x))
1442 fmt_values = [formatter(x) for x in self.values]
1443 return fmt_values
1446class Datetime64Formatter(GenericArrayFormatter):
1447 def __init__(
1448 self,
1449 values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray],
1450 nat_rep: str = "NaT",
1451 date_format: None = None,
1452 **kwargs,
1453 ):
1454 super().__init__(values, **kwargs)
1455 self.nat_rep = nat_rep
1456 self.date_format = date_format
1458 def _format_strings(self) -> List[str]:
1459 """ we by definition have DO NOT have a TZ """
1461 values = self.values
1463 if not isinstance(values, DatetimeIndex):
1464 values = DatetimeIndex(values)
1466 if self.formatter is not None and callable(self.formatter):
1467 return [self.formatter(x) for x in values]
1469 fmt_values = format_array_from_datetime(
1470 values.asi8.ravel(),
1471 format=_get_format_datetime64_from_values(values, self.date_format),
1472 na_rep=self.nat_rep,
1473 ).reshape(values.shape)
1474 return fmt_values.tolist()
1477class ExtensionArrayFormatter(GenericArrayFormatter):
1478 def _format_strings(self) -> List[str]:
1479 values = self.values
1480 if isinstance(values, (ABCIndexClass, ABCSeries)):
1481 values = values._values
1483 formatter = values._formatter(boxed=True)
1485 if is_categorical_dtype(values.dtype):
1486 # Categorical is special for now, so that we can preserve tzinfo
1487 array = values._internal_get_values()
1488 else:
1489 array = np.asarray(values)
1491 fmt_values = format_array(
1492 array,
1493 formatter,
1494 float_format=self.float_format,
1495 na_rep=self.na_rep,
1496 digits=self.digits,
1497 space=self.space,
1498 justify=self.justify,
1499 leading_space=self.leading_space,
1500 )
1501 return fmt_values
1504def format_percentiles(
1505 percentiles: Union[
1506 np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]]
1507 ]
1508) -> List[str]:
1509 """
1510 Outputs rounded and formatted percentiles.
1512 Parameters
1513 ----------
1514 percentiles : list-like, containing floats from interval [0,1]
1516 Returns
1517 -------
1518 formatted : list of strings
1520 Notes
1521 -----
1522 Rounding precision is chosen so that: (1) if any two elements of
1523 ``percentiles`` differ, they remain different after rounding
1524 (2) no entry is *rounded* to 0% or 100%.
1525 Any non-integer is always rounded to at least 1 decimal place.
1527 Examples
1528 --------
1529 Keeps all entries different after rounding:
1531 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
1532 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
1534 No element is rounded to 0% or 100% (unless already equal to it).
1535 Duplicates are allowed:
1537 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
1538 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
1539 """
1541 percentiles = np.asarray(percentiles)
1543 # It checks for np.NaN as well
1544 with np.errstate(invalid="ignore"):
1545 if (
1546 not is_numeric_dtype(percentiles)
1547 or not np.all(percentiles >= 0)
1548 or not np.all(percentiles <= 1)
1549 ):
1550 raise ValueError("percentiles should all be in the interval [0,1]")
1552 percentiles = 100 * percentiles
1553 int_idx = np.isclose(percentiles.astype(int), percentiles)
1555 if np.all(int_idx):
1556 out = percentiles.astype(int).astype(str)
1557 return [i + "%" for i in out]
1559 unique_pcts = np.unique(percentiles)
1560 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1561 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
1563 # Least precision that keeps percentiles unique after rounding
1564 prec = -np.floor(
1565 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
1566 ).astype(int)
1567 prec = max(1, prec)
1568 out = np.empty_like(percentiles, dtype=object)
1569 out[int_idx] = percentiles[int_idx].astype(int).astype(str)
1570 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
1571 return [i + "%" for i in out]
1574def _is_dates_only(
1575 values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex]
1576) -> bool:
1577 # return a boolean if we are only dates (and don't have a timezone)
1578 assert values.ndim == 1
1580 values = DatetimeIndex(values)
1581 if values.tz is not None:
1582 return False
1584 values_int = values.asi8
1585 consider_values = values_int != iNaT
1586 one_day_nanos = 86400 * 1e9
1587 even_days = (
1588 np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0
1589 )
1590 if even_days:
1591 return True
1592 return False
1595def _format_datetime64(
1596 x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT"
1597) -> str:
1598 if x is None or (is_scalar(x) and isna(x)):
1599 return nat_rep
1601 if tz is not None or not isinstance(x, Timestamp):
1602 if getattr(x, "tzinfo", None) is not None:
1603 x = Timestamp(x).tz_convert(tz)
1604 else:
1605 x = Timestamp(x).tz_localize(tz)
1607 return str(x)
1610def _format_datetime64_dateonly(
1611 x: Union[NaTType, Timestamp], nat_rep: str = "NaT", date_format: None = None
1612) -> str:
1613 if x is None or (is_scalar(x) and isna(x)):
1614 return nat_rep
1616 if not isinstance(x, Timestamp):
1617 x = Timestamp(x)
1619 if date_format:
1620 return x.strftime(date_format)
1621 else:
1622 return x._date_repr
1625def _get_format_datetime64(
1626 is_dates_only: bool, nat_rep: str = "NaT", date_format: None = None
1627) -> Callable:
1629 if is_dates_only:
1630 return lambda x, tz=None: _format_datetime64_dateonly(
1631 x, nat_rep=nat_rep, date_format=date_format
1632 )
1633 else:
1634 return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
1637def _get_format_datetime64_from_values(
1638 values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str]
1639) -> Optional[str]:
1640 """ given values and a date_format, return a string format """
1642 if isinstance(values, np.ndarray) and values.ndim > 1:
1643 # We don't actually care about the order of values, and DatetimeIndex
1644 # only accepts 1D values
1645 values = values.ravel()
1647 is_dates_only = _is_dates_only(values)
1648 if is_dates_only:
1649 return date_format or "%Y-%m-%d"
1650 return date_format
1653class Datetime64TZFormatter(Datetime64Formatter):
1654 def _format_strings(self) -> List[str]:
1655 """ we by definition have a TZ """
1657 values = self.values.astype(object)
1658 is_dates_only = _is_dates_only(values)
1659 formatter = self.formatter or _get_format_datetime64(
1660 is_dates_only, date_format=self.date_format
1661 )
1662 fmt_values = [formatter(x) for x in values]
1664 return fmt_values
1667class Timedelta64Formatter(GenericArrayFormatter):
1668 def __init__(
1669 self,
1670 values: Union[np.ndarray, TimedeltaIndex],
1671 nat_rep: str = "NaT",
1672 box: bool = False,
1673 **kwargs,
1674 ):
1675 super().__init__(values, **kwargs)
1676 self.nat_rep = nat_rep
1677 self.box = box
1679 def _format_strings(self) -> List[str]:
1680 formatter = self.formatter or _get_format_timedelta64(
1681 self.values, nat_rep=self.nat_rep, box=self.box
1682 )
1683 return [formatter(x) for x in self.values]
1686def _get_format_timedelta64(
1687 values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray],
1688 nat_rep: str = "NaT",
1689 box: bool = False,
1690) -> Callable:
1691 """
1692 Return a formatter function for a range of timedeltas.
1693 These will all have the same format argument
1695 If box, then show the return in quotes
1696 """
1698 values_int = values.astype(np.int64)
1700 consider_values = values_int != iNaT
1702 one_day_nanos = 86400 * 1e9
1703 even_days = (
1704 np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
1705 )
1706 all_sub_day = (
1707 np.logical_and(consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0
1708 )
1710 if even_days:
1711 format = None
1712 elif all_sub_day:
1713 format = "sub_day"
1714 else:
1715 format = "long"
1717 def _formatter(x):
1718 if x is None or (is_scalar(x) and isna(x)):
1719 return nat_rep
1721 if not isinstance(x, Timedelta):
1722 x = Timedelta(x)
1723 result = x._repr_base(format=format)
1724 if box:
1725 result = "'{res}'".format(res=result)
1726 return result
1728 return _formatter
1731def _make_fixed_width(
1732 strings: List[str],
1733 justify: str = "right",
1734 minimum: Optional[int] = None,
1735 adj: Optional[TextAdjustment] = None,
1736) -> List[str]:
1738 if len(strings) == 0 or justify == "all":
1739 return strings
1741 if adj is None:
1742 adj = _get_adjustment()
1744 max_len = max(adj.len(x) for x in strings)
1746 if minimum is not None:
1747 max_len = max(minimum, max_len)
1749 conf_max = get_option("display.max_colwidth")
1750 if conf_max is not None and max_len > conf_max:
1751 max_len = conf_max
1753 def just(x):
1754 if conf_max is not None:
1755 if (conf_max > 3) & (adj.len(x) > max_len):
1756 x = x[: max_len - 3] + "..."
1757 return x
1759 strings = [just(x) for x in strings]
1760 result = adj.justify(strings, max_len, mode=justify)
1761 return result
1764def _trim_zeros_complex(str_complexes: np.ndarray, na_rep: str = "NaN") -> List[str]:
1765 """
1766 Separates the real and imaginary parts from the complex number, and
1767 executes the _trim_zeros_float method on each of those.
1768 """
1769 return [
1770 "".join(_trim_zeros_float(re.split(r"([j+-])", x), na_rep))
1771 for x in str_complexes
1772 ]
1775def _trim_zeros_float(
1776 str_floats: Union[np.ndarray, List[str]], na_rep: str = "NaN"
1777) -> List[str]:
1778 """
1779 Trims zeros, leaving just one before the decimal points if need be.
1780 """
1781 trimmed = str_floats
1783 def _is_number(x):
1784 return x != na_rep and not x.endswith("inf")
1786 def _cond(values):
1787 finite = [x for x in values if _is_number(x)]
1788 return (
1789 len(finite) > 0
1790 and all(x.endswith("0") for x in finite)
1791 and not (any(("e" in x) or ("E" in x) for x in finite))
1792 )
1794 while _cond(trimmed):
1795 trimmed = [x[:-1] if _is_number(x) else x for x in trimmed]
1797 # leave one 0 after the decimal points if need be.
1798 return [x + "0" if x.endswith(".") and _is_number(x) else x for x in trimmed]
1801def _has_names(index: Index) -> bool:
1802 if isinstance(index, ABCMultiIndex):
1803 return com.any_not_none(*index.names)
1804 else:
1805 return index.name is not None
1808class EngFormatter:
1809 """
1810 Formats float values according to engineering format.
1812 Based on matplotlib.ticker.EngFormatter
1813 """
1815 # The SI engineering prefixes
1816 ENG_PREFIXES = {
1817 -24: "y",
1818 -21: "z",
1819 -18: "a",
1820 -15: "f",
1821 -12: "p",
1822 -9: "n",
1823 -6: "u",
1824 -3: "m",
1825 0: "",
1826 3: "k",
1827 6: "M",
1828 9: "G",
1829 12: "T",
1830 15: "P",
1831 18: "E",
1832 21: "Z",
1833 24: "Y",
1834 }
1836 def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False):
1837 self.accuracy = accuracy
1838 self.use_eng_prefix = use_eng_prefix
1840 def __call__(self, num: Union[int, float]) -> str:
1841 """ Formats a number in engineering notation, appending a letter
1842 representing the power of 1000 of the original number. Some examples:
1844 >>> format_eng(0) # for self.accuracy = 0
1845 ' 0'
1847 >>> format_eng(1000000) # for self.accuracy = 1,
1848 # self.use_eng_prefix = True
1849 ' 1.0M'
1851 >>> format_eng("-1e-6") # for self.accuracy = 2
1852 # self.use_eng_prefix = False
1853 '-1.00E-06'
1855 @param num: the value to represent
1856 @type num: either a numeric value or a string that can be converted to
1857 a numeric value (as per decimal.Decimal constructor)
1859 @return: engineering formatted string
1860 """
1861 dnum = decimal.Decimal(str(num))
1863 if decimal.Decimal.is_nan(dnum):
1864 return "NaN"
1866 if decimal.Decimal.is_infinite(dnum):
1867 return "inf"
1869 sign = 1
1871 if dnum < 0: # pragma: no cover
1872 sign = -1
1873 dnum = -dnum
1875 if dnum != 0:
1876 pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
1877 else:
1878 pow10 = decimal.Decimal(0)
1880 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
1881 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
1882 int_pow10 = int(pow10)
1884 if self.use_eng_prefix:
1885 prefix = self.ENG_PREFIXES[int_pow10]
1886 else:
1887 if int_pow10 < 0:
1888 prefix = "E-{pow10:02d}".format(pow10=-int_pow10)
1889 else:
1890 prefix = "E+{pow10:02d}".format(pow10=int_pow10)
1892 mant = sign * dnum / (10 ** pow10)
1894 if self.accuracy is None: # pragma: no cover
1895 format_str = "{mant: g}{prefix}"
1896 else:
1897 format_str = "{{mant: .{acc:d}f}}{{prefix}}".format(acc=self.accuracy)
1899 formatted = format_str.format(mant=mant, prefix=prefix)
1901 return formatted
1904def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
1905 """
1906 Alter default behavior on how float is formatted in DataFrame.
1907 Format float in engineering format. By accuracy, we mean the number of
1908 decimal digits after the floating point.
1910 See also EngFormatter.
1911 """
1913 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
1914 set_option("display.column_space", max(12, accuracy + 9))
1917def _binify(cols: List[int], line_width: int) -> List[int]:
1918 adjoin_width = 1
1919 bins = []
1920 curr_width = 0
1921 i_last_column = len(cols) - 1
1922 for i, w in enumerate(cols):
1923 w_adjoined = w + adjoin_width
1924 curr_width += w_adjoined
1925 if i_last_column == i:
1926 wrap = curr_width + 1 > line_width and i > 0
1927 else:
1928 wrap = curr_width + 2 > line_width and i > 0
1929 if wrap:
1930 bins.append(i)
1931 curr_width = w_adjoined
1933 bins.append(len(cols))
1934 return bins
1937def get_level_lengths(
1938 levels: Any, sentinel: Union[bool, object, str] = ""
1939) -> List[Dict[int, int]]:
1940 """For each index in each level the function returns lengths of indexes.
1942 Parameters
1943 ----------
1944 levels : list of lists
1945 List of values on for level.
1946 sentinel : string, optional
1947 Value which states that no new index starts on there.
1949 Returns
1950 -------
1951 Returns list of maps. For each level returns map of indexes (key is index
1952 in row and value is length of index).
1953 """
1954 if len(levels) == 0:
1955 return []
1957 control = [True] * len(levels[0])
1959 result = []
1960 for level in levels:
1961 last_index = 0
1963 lengths = {}
1964 for i, key in enumerate(level):
1965 if control[i] and key == sentinel:
1966 pass
1967 else:
1968 control[i] = False
1969 lengths[last_index] = i - last_index
1970 last_index = i
1972 lengths[last_index] = len(level) - last_index
1974 result.append(lengths)
1976 return result
1979def buffer_put_lines(buf: IO[str], lines: List[str]) -> None:
1980 """
1981 Appends lines to a buffer.
1983 Parameters
1984 ----------
1985 buf
1986 The buffer to write to
1987 lines
1988 The lines to append.
1989 """
1990 if any(isinstance(x, str) for x in lines):
1991 lines = [str(x) for x in lines]
1992 buf.write("\n".join(lines))