Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from datetime import time 

2 

3import numpy as np 

4 

5from pandas.compat._optional import import_optional_dependency 

6 

7from pandas.io.excel._base import _BaseExcelReader 

8 

9 

10class _XlrdReader(_BaseExcelReader): 

11 def __init__(self, filepath_or_buffer): 

12 """Reader using xlrd engine. 

13 

14 Parameters 

15 ---------- 

16 filepath_or_buffer : string, path object or Workbook 

17 Object to be parsed. 

18 """ 

19 err_msg = "Install xlrd >= 1.0.0 for Excel support" 

20 import_optional_dependency("xlrd", extra=err_msg) 

21 super().__init__(filepath_or_buffer) 

22 

23 @property 

24 def _workbook_class(self): 

25 from xlrd import Book 

26 

27 return Book 

28 

29 def load_workbook(self, filepath_or_buffer): 

30 from xlrd import open_workbook 

31 

32 if hasattr(filepath_or_buffer, "read"): 

33 data = filepath_or_buffer.read() 

34 return open_workbook(file_contents=data) 

35 else: 

36 return open_workbook(filepath_or_buffer) 

37 

38 @property 

39 def sheet_names(self): 

40 return self.book.sheet_names() 

41 

42 def get_sheet_by_name(self, name): 

43 return self.book.sheet_by_name(name) 

44 

45 def get_sheet_by_index(self, index): 

46 return self.book.sheet_by_index(index) 

47 

48 def get_sheet_data(self, sheet, convert_float): 

49 from xlrd import ( 

50 xldate, 

51 XL_CELL_DATE, 

52 XL_CELL_ERROR, 

53 XL_CELL_BOOLEAN, 

54 XL_CELL_NUMBER, 

55 ) 

56 

57 epoch1904 = self.book.datemode 

58 

59 def _parse_cell(cell_contents, cell_typ): 

60 """converts the contents of the cell into a pandas 

61 appropriate object""" 

62 

63 if cell_typ == XL_CELL_DATE: 

64 

65 # Use the newer xlrd datetime handling. 

66 try: 

67 cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) 

68 except OverflowError: 

69 return cell_contents 

70 

71 # Excel doesn't distinguish between dates and time, 

72 # so we treat dates on the epoch as times only. 

73 # Also, Excel supports 1900 and 1904 epochs. 

74 year = (cell_contents.timetuple())[0:3] 

75 if (not epoch1904 and year == (1899, 12, 31)) or ( 

76 epoch1904 and year == (1904, 1, 1) 

77 ): 

78 cell_contents = time( 

79 cell_contents.hour, 

80 cell_contents.minute, 

81 cell_contents.second, 

82 cell_contents.microsecond, 

83 ) 

84 

85 elif cell_typ == XL_CELL_ERROR: 

86 cell_contents = np.nan 

87 elif cell_typ == XL_CELL_BOOLEAN: 

88 cell_contents = bool(cell_contents) 

89 elif convert_float and cell_typ == XL_CELL_NUMBER: 

90 # GH5394 - Excel 'numbers' are always floats 

91 # it's a minimal perf hit and less surprising 

92 val = int(cell_contents) 

93 if val == cell_contents: 

94 cell_contents = val 

95 return cell_contents 

96 

97 data = [] 

98 

99 for i in range(sheet.nrows): 

100 row = [ 

101 _parse_cell(value, typ) 

102 for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) 

103 ] 

104 data.append(row) 

105 

106 return data