Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import abc 

2from datetime import date, datetime, timedelta 

3from io import BytesIO 

4import os 

5from textwrap import fill 

6 

7from pandas._config import config 

8 

9from pandas._libs.parsers import STR_NA_VALUES 

10from pandas.errors import EmptyDataError 

11from pandas.util._decorators import Appender 

12 

13from pandas.core.dtypes.common import is_bool, is_float, is_integer, is_list_like 

14 

15from pandas.core.frame import DataFrame 

16 

17from pandas.io.common import ( 

18 get_filepath_or_buffer, 

19 is_url, 

20 stringify_path, 

21 urlopen, 

22 validate_header_arg, 

23) 

24from pandas.io.excel._util import ( 

25 _fill_mi_header, 

26 _get_default_writer, 

27 _maybe_convert_usecols, 

28 _pop_header_name, 

29 get_writer, 

30) 

31from pandas.io.formats.printing import pprint_thing 

32from pandas.io.parsers import TextParser 

33 

34_read_excel_doc = ( 

35 """ 

36Read an Excel file into a pandas DataFrame. 

37 

38Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions 

39read from a local filesystem or URL. Supports an option to read 

40a single sheet or a list of sheets. 

41 

42Parameters 

43---------- 

44io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object 

45 Any valid string path is acceptable. The string could be a URL. Valid 

46 URL schemes include http, ftp, s3, and file. For file URLs, a host is 

47 expected. A local file could be: ``file://localhost/path/to/table.xlsx``. 

48 

49 If you want to pass in a path object, pandas accepts any ``os.PathLike``. 

50 

51 By file-like object, we refer to objects with a ``read()`` method, 

52 such as a file handler (e.g. via builtin ``open`` function) 

53 or ``StringIO``. 

54sheet_name : str, int, list, or None, default 0 

55 Strings are used for sheet names. Integers are used in zero-indexed 

56 sheet positions. Lists of strings/integers are used to request 

57 multiple sheets. Specify None to get all sheets. 

58 

59 Available cases: 

60 

61 * Defaults to ``0``: 1st sheet as a `DataFrame` 

62 * ``1``: 2nd sheet as a `DataFrame` 

63 * ``"Sheet1"``: Load sheet with name "Sheet1" 

64 * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" 

65 as a dict of `DataFrame` 

66 * None: All sheets. 

67 

68header : int, list of int, default 0 

69 Row (0-indexed) to use for the column labels of the parsed 

70 DataFrame. If a list of integers is passed those row positions will 

71 be combined into a ``MultiIndex``. Use None if there is no header. 

72names : array-like, default None 

73 List of column names to use. If file contains no header row, 

74 then you should explicitly pass header=None. 

75index_col : int, list of int, default None 

76 Column (0-indexed) to use as the row labels of the DataFrame. 

77 Pass None if there is no such column. If a list is passed, 

78 those columns will be combined into a ``MultiIndex``. If a 

79 subset of data is selected with ``usecols``, index_col 

80 is based on the subset. 

81usecols : int, str, list-like, or callable default None 

82 * If None, then parse all columns. 

83 * If str, then indicates comma separated list of Excel column letters 

84 and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of 

85 both sides. 

86 * If list of int, then indicates list of column numbers to be parsed. 

87 * If list of string, then indicates list of column names to be parsed. 

88 

89 .. versionadded:: 0.24.0 

90 

91 * If callable, then evaluate each column name against it and parse the 

92 column if the callable returns ``True``. 

93 

94 Returns a subset of the columns according to behavior above. 

95 

96 .. versionadded:: 0.24.0 

97 

98squeeze : bool, default False 

99 If the parsed data only contains one column then return a Series. 

100dtype : Type name or dict of column -> type, default None 

101 Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} 

102 Use `object` to preserve data as stored in Excel and not interpret dtype. 

103 If converters are specified, they will be applied INSTEAD 

104 of dtype conversion. 

105engine : str, default None 

106 If io is not a buffer or path, this must be set to identify io. 

107 Acceptable values are None, "xlrd", "openpyxl" or "odf". 

108converters : dict, default None 

109 Dict of functions for converting values in certain columns. Keys can 

110 either be integers or column labels, values are functions that take one 

111 input argument, the Excel cell content, and return the transformed 

112 content. 

113true_values : list, default None 

114 Values to consider as True. 

115false_values : list, default None 

116 Values to consider as False. 

117skiprows : list-like 

118 Rows to skip at the beginning (0-indexed). 

119nrows : int, default None 

120 Number of rows to parse. 

121 

122 .. versionadded:: 0.23.0 

123 

124na_values : scalar, str, list-like, or dict, default None 

125 Additional strings to recognize as NA/NaN. If dict passed, specific 

126 per-column NA values. By default the following values are interpreted 

127 as NaN: '""" 

128 + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") 

129 + """'. 

130keep_default_na : bool, default True 

131 Whether or not to include the default NaN values when parsing the data. 

132 Depending on whether `na_values` is passed in, the behavior is as follows: 

133 

134 * If `keep_default_na` is True, and `na_values` are specified, `na_values` 

135 is appended to the default NaN values used for parsing. 

136 * If `keep_default_na` is True, and `na_values` are not specified, only 

137 the default NaN values are used for parsing. 

138 * If `keep_default_na` is False, and `na_values` are specified, only 

139 the NaN values specified `na_values` are used for parsing. 

140 * If `keep_default_na` is False, and `na_values` are not specified, no 

141 strings will be parsed as NaN. 

142 

143 Note that if `na_filter` is passed in as False, the `keep_default_na` and 

144 `na_values` parameters will be ignored. 

145na_filter : bool, default True 

146 Detect missing value markers (empty strings and the value of na_values). In 

147 data without any NAs, passing na_filter=False can improve the performance 

148 of reading a large file. 

149verbose : bool, default False 

150 Indicate number of NA values placed in non-numeric columns. 

151parse_dates : bool, list-like, or dict, default False 

152 The behavior is as follows: 

153 

154 * bool. If True -> try parsing the index. 

155 * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 

156 each as a separate date column. 

157 * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as 

158 a single date column. 

159 * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call 

160 result 'foo' 

161 

162 If a column or index contains an unparseable date, the entire column or 

163 index will be returned unaltered as an object data type. If you don`t want to 

164 parse some cells as date just change their type in Excel to "Text". 

165 For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. 

166 

167 Note: A fast-path exists for iso8601-formatted dates. 

168date_parser : function, optional 

169 Function to use for converting a sequence of string columns to an array of 

170 datetime instances. The default uses ``dateutil.parser.parser`` to do the 

171 conversion. Pandas will try to call `date_parser` in three different ways, 

172 advancing to the next if an exception occurs: 1) Pass one or more arrays 

173 (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the 

174 string values from the columns defined by `parse_dates` into a single array 

175 and pass that; and 3) call `date_parser` once for each row using one or 

176 more strings (corresponding to the columns defined by `parse_dates`) as 

177 arguments. 

178thousands : str, default None 

179 Thousands separator for parsing string columns to numeric. Note that 

180 this parameter is only necessary for columns stored as TEXT in Excel, 

181 any numeric columns will automatically be parsed, regardless of display 

182 format. 

183comment : str, default None 

184 Comments out remainder of line. Pass a character or characters to this 

185 argument to indicate comments in the input file. Any data between the 

186 comment string and the end of the current line is ignored. 

187skipfooter : int, default 0 

188 Rows at the end to skip (0-indexed). 

189convert_float : bool, default True 

190 Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric 

191 data will be read in as floats: Excel stores all numbers as floats 

192 internally. 

193mangle_dupe_cols : bool, default True 

194 Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 

195 'X'...'X'. Passing in False will cause data to be overwritten if there 

196 are duplicate names in the columns. 

197**kwds : optional 

198 Optional keyword arguments can be passed to ``TextFileReader``. 

199 

200Returns 

201------- 

202DataFrame or dict of DataFrames 

203 DataFrame from the passed in Excel file. See notes in sheet_name 

204 argument for more information on when a dict of DataFrames is returned. 

205 

206See Also 

207-------- 

208to_excel : Write DataFrame to an Excel file. 

209to_csv : Write DataFrame to a comma-separated values (csv) file. 

210read_csv : Read a comma-separated values (csv) file into DataFrame. 

211read_fwf : Read a table of fixed-width formatted lines into DataFrame. 

212 

213Examples 

214-------- 

215The file can be read using the file name as string or an open file object: 

216 

217>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP 

218 Name Value 

2190 string1 1 

2201 string2 2 

2212 #Comment 3 

222 

223>>> pd.read_excel(open('tmp.xlsx', 'rb'), 

224... sheet_name='Sheet3') # doctest: +SKIP 

225 Unnamed: 0 Name Value 

2260 0 string1 1 

2271 1 string2 2 

2282 2 #Comment 3 

229 

230Index and header can be specified via the `index_col` and `header` arguments 

231 

232>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP 

233 0 1 2 

2340 NaN Name Value 

2351 0.0 string1 1 

2362 1.0 string2 2 

2373 2.0 #Comment 3 

238 

239Column types are inferred but can be explicitly specified 

240 

241>>> pd.read_excel('tmp.xlsx', index_col=0, 

242... dtype={'Name': str, 'Value': float}) # doctest: +SKIP 

243 Name Value 

2440 string1 1.0 

2451 string2 2.0 

2462 #Comment 3.0 

247 

248True, False, and NA values, and thousands separators have defaults, 

249but can be explicitly specified, too. Supply the values you would like 

250as strings or lists of strings! 

251 

252>>> pd.read_excel('tmp.xlsx', index_col=0, 

253... na_values=['string1', 'string2']) # doctest: +SKIP 

254 Name Value 

2550 NaN 1 

2561 NaN 2 

2572 #Comment 3 

258 

259Comment lines in the excel input file can be skipped using the `comment` kwarg 

260 

261>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP 

262 Name Value 

2630 string1 1.0 

2641 string2 2.0 

2652 None NaN 

266""" 

267) 

268 

269 

270@Appender(_read_excel_doc) 

271def read_excel( 

272 io, 

273 sheet_name=0, 

274 header=0, 

275 names=None, 

276 index_col=None, 

277 usecols=None, 

278 squeeze=False, 

279 dtype=None, 

280 engine=None, 

281 converters=None, 

282 true_values=None, 

283 false_values=None, 

284 skiprows=None, 

285 nrows=None, 

286 na_values=None, 

287 keep_default_na=True, 

288 verbose=False, 

289 parse_dates=False, 

290 date_parser=None, 

291 thousands=None, 

292 comment=None, 

293 skipfooter=0, 

294 convert_float=True, 

295 mangle_dupe_cols=True, 

296 **kwds, 

297): 

298 

299 for arg in ("sheet", "sheetname", "parse_cols"): 

300 if arg in kwds: 

301 raise TypeError(f"read_excel() got an unexpected keyword argument `{arg}`") 

302 

303 if not isinstance(io, ExcelFile): 

304 io = ExcelFile(io, engine=engine) 

305 elif engine and engine != io.engine: 

306 raise ValueError( 

307 "Engine should not be specified when passing " 

308 "an ExcelFile - ExcelFile already has the engine set" 

309 ) 

310 

311 return io.parse( 

312 sheet_name=sheet_name, 

313 header=header, 

314 names=names, 

315 index_col=index_col, 

316 usecols=usecols, 

317 squeeze=squeeze, 

318 dtype=dtype, 

319 converters=converters, 

320 true_values=true_values, 

321 false_values=false_values, 

322 skiprows=skiprows, 

323 nrows=nrows, 

324 na_values=na_values, 

325 keep_default_na=keep_default_na, 

326 verbose=verbose, 

327 parse_dates=parse_dates, 

328 date_parser=date_parser, 

329 thousands=thousands, 

330 comment=comment, 

331 skipfooter=skipfooter, 

332 convert_float=convert_float, 

333 mangle_dupe_cols=mangle_dupe_cols, 

334 **kwds, 

335 ) 

336 

337 

338class _BaseExcelReader(metaclass=abc.ABCMeta): 

339 def __init__(self, filepath_or_buffer): 

340 # If filepath_or_buffer is a url, load the data into a BytesIO 

341 if is_url(filepath_or_buffer): 

342 filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) 

343 elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): 

344 filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) 

345 

346 if isinstance(filepath_or_buffer, self._workbook_class): 

347 self.book = filepath_or_buffer 

348 elif hasattr(filepath_or_buffer, "read"): 

349 # N.B. xlrd.Book has a read attribute too 

350 filepath_or_buffer.seek(0) 

351 self.book = self.load_workbook(filepath_or_buffer) 

352 elif isinstance(filepath_or_buffer, str): 

353 self.book = self.load_workbook(filepath_or_buffer) 

354 elif isinstance(filepath_or_buffer, bytes): 

355 self.book = self.load_workbook(BytesIO(filepath_or_buffer)) 

356 else: 

357 raise ValueError( 

358 "Must explicitly set engine if not passing in buffer or path for io." 

359 ) 

360 

361 @property 

362 @abc.abstractmethod 

363 def _workbook_class(self): 

364 pass 

365 

366 @abc.abstractmethod 

367 def load_workbook(self, filepath_or_buffer): 

368 pass 

369 

370 def close(self): 

371 pass 

372 

373 @property 

374 @abc.abstractmethod 

375 def sheet_names(self): 

376 pass 

377 

378 @abc.abstractmethod 

379 def get_sheet_by_name(self, name): 

380 pass 

381 

382 @abc.abstractmethod 

383 def get_sheet_by_index(self, index): 

384 pass 

385 

386 @abc.abstractmethod 

387 def get_sheet_data(self, sheet, convert_float): 

388 pass 

389 

390 def parse( 

391 self, 

392 sheet_name=0, 

393 header=0, 

394 names=None, 

395 index_col=None, 

396 usecols=None, 

397 squeeze=False, 

398 dtype=None, 

399 true_values=None, 

400 false_values=None, 

401 skiprows=None, 

402 nrows=None, 

403 na_values=None, 

404 verbose=False, 

405 parse_dates=False, 

406 date_parser=None, 

407 thousands=None, 

408 comment=None, 

409 skipfooter=0, 

410 convert_float=True, 

411 mangle_dupe_cols=True, 

412 **kwds, 

413 ): 

414 

415 validate_header_arg(header) 

416 

417 ret_dict = False 

418 

419 # Keep sheetname to maintain backwards compatibility. 

420 if isinstance(sheet_name, list): 

421 sheets = sheet_name 

422 ret_dict = True 

423 elif sheet_name is None: 

424 sheets = self.sheet_names 

425 ret_dict = True 

426 else: 

427 sheets = [sheet_name] 

428 

429 # handle same-type duplicates. 

430 sheets = list(dict.fromkeys(sheets).keys()) 

431 

432 output = {} 

433 

434 for asheetname in sheets: 

435 if verbose: 

436 print(f"Reading sheet {asheetname}") 

437 

438 if isinstance(asheetname, str): 

439 sheet = self.get_sheet_by_name(asheetname) 

440 else: # assume an integer if not a string 

441 sheet = self.get_sheet_by_index(asheetname) 

442 

443 data = self.get_sheet_data(sheet, convert_float) 

444 usecols = _maybe_convert_usecols(usecols) 

445 

446 if not data: 

447 output[asheetname] = DataFrame() 

448 continue 

449 

450 if is_list_like(header) and len(header) == 1: 

451 header = header[0] 

452 

453 # forward fill and pull out names for MultiIndex column 

454 header_names = None 

455 if header is not None and is_list_like(header): 

456 header_names = [] 

457 control_row = [True] * len(data[0]) 

458 

459 for row in header: 

460 if is_integer(skiprows): 

461 row += skiprows 

462 

463 data[row], control_row = _fill_mi_header(data[row], control_row) 

464 

465 if index_col is not None: 

466 header_name, _ = _pop_header_name(data[row], index_col) 

467 header_names.append(header_name) 

468 

469 if is_list_like(index_col): 

470 # Forward fill values for MultiIndex index. 

471 if not is_list_like(header): 

472 offset = 1 + header 

473 else: 

474 offset = 1 + max(header) 

475 

476 # Check if we have an empty dataset 

477 # before trying to collect data. 

478 if offset < len(data): 

479 for col in index_col: 

480 last = data[offset][col] 

481 

482 for row in range(offset + 1, len(data)): 

483 if data[row][col] == "" or data[row][col] is None: 

484 data[row][col] = last 

485 else: 

486 last = data[row][col] 

487 

488 has_index_names = is_list_like(header) and len(header) > 1 

489 

490 # GH 12292 : error when read one empty column from excel file 

491 try: 

492 parser = TextParser( 

493 data, 

494 names=names, 

495 header=header, 

496 index_col=index_col, 

497 has_index_names=has_index_names, 

498 squeeze=squeeze, 

499 dtype=dtype, 

500 true_values=true_values, 

501 false_values=false_values, 

502 skiprows=skiprows, 

503 nrows=nrows, 

504 na_values=na_values, 

505 parse_dates=parse_dates, 

506 date_parser=date_parser, 

507 thousands=thousands, 

508 comment=comment, 

509 skipfooter=skipfooter, 

510 usecols=usecols, 

511 mangle_dupe_cols=mangle_dupe_cols, 

512 **kwds, 

513 ) 

514 

515 output[asheetname] = parser.read(nrows=nrows) 

516 

517 if not squeeze or isinstance(output[asheetname], DataFrame): 

518 if header_names: 

519 output[asheetname].columns = output[ 

520 asheetname 

521 ].columns.set_names(header_names) 

522 

523 except EmptyDataError: 

524 # No Data, return an empty DataFrame 

525 output[asheetname] = DataFrame() 

526 

527 if ret_dict: 

528 return output 

529 else: 

530 return output[asheetname] 

531 

532 

533class ExcelWriter(metaclass=abc.ABCMeta): 

534 """ 

535 Class for writing DataFrame objects into excel sheets. 

536 

537 Default is to use xlwt for xls, openpyxl for xlsx. 

538 See DataFrame.to_excel for typical usage. 

539 

540 Parameters 

541 ---------- 

542 path : str 

543 Path to xls or xlsx file. 

544 engine : str (optional) 

545 Engine to use for writing. If None, defaults to 

546 ``io.excel.<extension>.writer``. NOTE: can only be passed as a keyword 

547 argument. 

548 date_format : str, default None 

549 Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). 

550 datetime_format : str, default None 

551 Format string for datetime objects written into Excel files. 

552 (e.g. 'YYYY-MM-DD HH:MM:SS'). 

553 mode : {'w', 'a'}, default 'w' 

554 File mode to use (write or append). 

555 

556 .. versionadded:: 0.24.0 

557 

558 Attributes 

559 ---------- 

560 None 

561 

562 Methods 

563 ------- 

564 None 

565 

566 Notes 

567 ----- 

568 None of the methods and properties are considered public. 

569 

570 For compatibility with CSV writers, ExcelWriter serializes lists 

571 and dicts to strings before writing. 

572 

573 Examples 

574 -------- 

575 Default usage: 

576 

577 >>> with ExcelWriter('path_to_file.xlsx') as writer: 

578 ... df.to_excel(writer) 

579 

580 To write to separate sheets in a single file: 

581 

582 >>> with ExcelWriter('path_to_file.xlsx') as writer: 

583 ... df1.to_excel(writer, sheet_name='Sheet1') 

584 ... df2.to_excel(writer, sheet_name='Sheet2') 

585 

586 You can set the date format or datetime format: 

587 

588 >>> with ExcelWriter('path_to_file.xlsx', 

589 date_format='YYYY-MM-DD', 

590 datetime_format='YYYY-MM-DD HH:MM:SS') as writer: 

591 ... df.to_excel(writer) 

592 

593 You can also append to an existing Excel file: 

594 

595 >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer: 

596 ... df.to_excel(writer, sheet_name='Sheet3') 

597 """ 

598 

599 # Defining an ExcelWriter implementation (see abstract methods for more...) 

600 

601 # - Mandatory 

602 # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` 

603 # --> called to write additional DataFrames to disk 

604 # - ``supported_extensions`` (tuple of supported extensions), used to 

605 # check that engine supports the given extension. 

606 # - ``engine`` - string that gives the engine name. Necessary to 

607 # instantiate class directly and bypass ``ExcelWriterMeta`` engine 

608 # lookup. 

609 # - ``save(self)`` --> called to save file to disk 

610 # - Mostly mandatory (i.e. should at least exist) 

611 # - book, cur_sheet, path 

612 

613 # - Optional: 

614 # - ``__init__(self, path, engine=None, **kwargs)`` --> always called 

615 # with path as first argument. 

616 

617 # You also need to register the class with ``register_writer()``. 

618 # Technically, ExcelWriter implementations don't need to subclass 

619 # ExcelWriter. 

620 def __new__(cls, path, engine=None, **kwargs): 

621 # only switch class if generic(ExcelWriter) 

622 

623 if cls is ExcelWriter: 

624 if engine is None or (isinstance(engine, str) and engine == "auto"): 

625 if isinstance(path, str): 

626 ext = os.path.splitext(path)[-1][1:] 

627 else: 

628 ext = "xlsx" 

629 

630 try: 

631 engine = config.get_option(f"io.excel.{ext}.writer") 

632 if engine == "auto": 

633 engine = _get_default_writer(ext) 

634 except KeyError: 

635 raise ValueError(f"No engine for filetype: '{ext}'") 

636 cls = get_writer(engine) 

637 

638 return object.__new__(cls) 

639 

640 # declare external properties you can count on 

641 book = None 

642 curr_sheet = None 

643 path = None 

644 

645 @property 

646 @abc.abstractmethod 

647 def supported_extensions(self): 

648 """Extensions that writer engine supports.""" 

649 pass 

650 

651 @property 

652 @abc.abstractmethod 

653 def engine(self): 

654 """Name of engine.""" 

655 pass 

656 

657 @abc.abstractmethod 

658 def write_cells( 

659 self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None 

660 ): 

661 """ 

662 Write given formatted cells into Excel an excel sheet 

663 

664 Parameters 

665 ---------- 

666 cells : generator 

667 cell of formatted data to save to Excel sheet 

668 sheet_name : str, default None 

669 Name of Excel sheet, if None, then use self.cur_sheet 

670 startrow : upper left cell row to dump data frame 

671 startcol : upper left cell column to dump data frame 

672 freeze_panes: int tuple of length 2 

673 contains the bottom-most row and right-most column to freeze 

674 """ 

675 pass 

676 

677 @abc.abstractmethod 

678 def save(self): 

679 """ 

680 Save workbook to disk. 

681 """ 

682 pass 

683 

684 def __init__( 

685 self, 

686 path, 

687 engine=None, 

688 date_format=None, 

689 datetime_format=None, 

690 mode="w", 

691 **engine_kwargs, 

692 ): 

693 # validate that this engine can handle the extension 

694 if isinstance(path, str): 

695 ext = os.path.splitext(path)[-1] 

696 else: 

697 ext = "xls" if engine == "xlwt" else "xlsx" 

698 

699 self.check_extension(ext) 

700 

701 self.path = path 

702 self.sheets = {} 

703 self.cur_sheet = None 

704 

705 if date_format is None: 

706 self.date_format = "YYYY-MM-DD" 

707 else: 

708 self.date_format = date_format 

709 if datetime_format is None: 

710 self.datetime_format = "YYYY-MM-DD HH:MM:SS" 

711 else: 

712 self.datetime_format = datetime_format 

713 

714 self.mode = mode 

715 

716 def __fspath__(self): 

717 return stringify_path(self.path) 

718 

719 def _get_sheet_name(self, sheet_name): 

720 if sheet_name is None: 

721 sheet_name = self.cur_sheet 

722 if sheet_name is None: # pragma: no cover 

723 raise ValueError("Must pass explicit sheet_name or set cur_sheet property") 

724 return sheet_name 

725 

726 def _value_with_fmt(self, val): 

727 """Convert numpy types to Python types for the Excel writers. 

728 

729 Parameters 

730 ---------- 

731 val : object 

732 Value to be written into cells 

733 

734 Returns 

735 ------- 

736 Tuple with the first element being the converted value and the second 

737 being an optional format 

738 """ 

739 fmt = None 

740 

741 if is_integer(val): 

742 val = int(val) 

743 elif is_float(val): 

744 val = float(val) 

745 elif is_bool(val): 

746 val = bool(val) 

747 elif isinstance(val, datetime): 

748 fmt = self.datetime_format 

749 elif isinstance(val, date): 

750 fmt = self.date_format 

751 elif isinstance(val, timedelta): 

752 val = val.total_seconds() / float(86400) 

753 fmt = "0" 

754 else: 

755 val = str(val) 

756 

757 return val, fmt 

758 

759 @classmethod 

760 def check_extension(cls, ext): 

761 """checks that path's extension against the Writer's supported 

762 extensions. If it isn't supported, raises UnsupportedFiletypeError.""" 

763 if ext.startswith("."): 

764 ext = ext[1:] 

765 if not any(ext in extension for extension in cls.supported_extensions): 

766 msg = "Invalid extension for engine" 

767 f"'{pprint_thing(cls.engine)}': '{pprint_thing(ext)}'" 

768 raise ValueError(msg) 

769 else: 

770 return True 

771 

772 # Allow use as a contextmanager 

773 def __enter__(self): 

774 return self 

775 

776 def __exit__(self, exc_type, exc_value, traceback): 

777 self.close() 

778 

779 def close(self): 

780 """synonym for save, to make it more file-like""" 

781 return self.save() 

782 

783 

784class ExcelFile: 

785 """ 

786 Class for parsing tabular excel sheets into DataFrame objects. 

787 Uses xlrd. See read_excel for more documentation 

788 

789 Parameters 

790 ---------- 

791 io : str, path object (pathlib.Path or py._path.local.LocalPath), 

792 a file-like object, xlrd workbook or openpypl workbook. 

793 If a string or path object, expected to be a path to xls, xlsx or odf file. 

794 engine : str, default None 

795 If io is not a buffer or path, this must be set to identify io. 

796 Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``. 

797 Note that ``odf`` reads tables out of OpenDocument formatted files. 

798 """ 

799 

800 from pandas.io.excel._odfreader import _ODFReader 

801 from pandas.io.excel._openpyxl import _OpenpyxlReader 

802 from pandas.io.excel._xlrd import _XlrdReader 

803 from pandas.io.excel._pyxlsb import _PyxlsbReader 

804 

805 _engines = { 

806 "xlrd": _XlrdReader, 

807 "openpyxl": _OpenpyxlReader, 

808 "odf": _ODFReader, 

809 "pyxlsb": _PyxlsbReader, 

810 } 

811 

812 def __init__(self, io, engine=None): 

813 if engine is None: 

814 engine = "xlrd" 

815 if engine not in self._engines: 

816 raise ValueError(f"Unknown engine: {engine}") 

817 

818 self.engine = engine 

819 # could be a str, ExcelFile, Book, etc. 

820 self.io = io 

821 # Always a string 

822 self._io = stringify_path(io) 

823 

824 self._reader = self._engines[engine](self._io) 

825 

826 def __fspath__(self): 

827 return self._io 

828 

829 def parse( 

830 self, 

831 sheet_name=0, 

832 header=0, 

833 names=None, 

834 index_col=None, 

835 usecols=None, 

836 squeeze=False, 

837 converters=None, 

838 true_values=None, 

839 false_values=None, 

840 skiprows=None, 

841 nrows=None, 

842 na_values=None, 

843 parse_dates=False, 

844 date_parser=None, 

845 thousands=None, 

846 comment=None, 

847 skipfooter=0, 

848 convert_float=True, 

849 mangle_dupe_cols=True, 

850 **kwds, 

851 ): 

852 """ 

853 Parse specified sheet(s) into a DataFrame. 

854 

855 Equivalent to read_excel(ExcelFile, ...) See the read_excel 

856 docstring for more info on accepted parameters. 

857 

858 Returns 

859 ------- 

860 DataFrame or dict of DataFrames 

861 DataFrame from the passed in Excel file. 

862 """ 

863 if "chunksize" in kwds: 

864 raise NotImplementedError( 

865 "chunksize keyword of read_excel is not implemented" 

866 ) 

867 

868 return self._reader.parse( 

869 sheet_name=sheet_name, 

870 header=header, 

871 names=names, 

872 index_col=index_col, 

873 usecols=usecols, 

874 squeeze=squeeze, 

875 converters=converters, 

876 true_values=true_values, 

877 false_values=false_values, 

878 skiprows=skiprows, 

879 nrows=nrows, 

880 na_values=na_values, 

881 parse_dates=parse_dates, 

882 date_parser=date_parser, 

883 thousands=thousands, 

884 comment=comment, 

885 skipfooter=skipfooter, 

886 convert_float=convert_float, 

887 mangle_dupe_cols=mangle_dupe_cols, 

888 **kwds, 

889 ) 

890 

891 @property 

892 def book(self): 

893 return self._reader.book 

894 

895 @property 

896 def sheet_names(self): 

897 return self._reader.sheet_names 

898 

899 def close(self): 

900 """close io if necessary""" 

901 self._reader.close() 

902 

903 def __enter__(self): 

904 return self 

905 

906 def __exit__(self, exc_type, exc_value, traceback): 

907 self.close() 

908 

909 def __del__(self): 

910 # Ensure we don't leak file descriptors, but put in try/except in case 

911 # attributes are already deleted 

912 try: 

913 self.close() 

914 except AttributeError: 

915 pass