Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" pickle compat """ 

2import pickle 

3from typing import Any, Optional 

4import warnings 

5 

6from pandas._typing import FilePathOrBuffer 

7from pandas.compat import pickle_compat as pc 

8 

9from pandas.io.common import get_filepath_or_buffer, get_handle 

10 

11 

12def to_pickle( 

13 obj: Any, 

14 filepath_or_buffer: FilePathOrBuffer, 

15 compression: Optional[str] = "infer", 

16 protocol: int = pickle.HIGHEST_PROTOCOL, 

17): 

18 """ 

19 Pickle (serialize) object to file. 

20 

21 Parameters 

22 ---------- 

23 obj : any object 

24 Any python object. 

25 filepath_or_buffer : str, path object or file-like object 

26 File path, URL, or buffer where the pickled object will be stored. 

27 

28 .. versionchanged:: 1.0.0 

29 Accept URL. URL has to be of S3 or GCS. 

30 

31 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' 

32 If 'infer' and 'path_or_url' is path-like, then detect compression from 

33 the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no 

34 compression) If 'infer' and 'path_or_url' is not path-like, then use 

35 None (= no decompression). 

36 protocol : int 

37 Int which indicates which protocol should be used by the pickler, 

38 default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible 

39 values for this parameter depend on the version of Python. For Python 

40 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. 

41 For Python >= 3.4, 4 is a valid value. A negative value for the 

42 protocol parameter is equivalent to setting its value to 

43 HIGHEST_PROTOCOL. 

44 

45 .. [1] https://docs.python.org/3/library/pickle.html 

46 .. versionadded:: 0.21.0 

47 

48 See Also 

49 -------- 

50 read_pickle : Load pickled pandas object (or any object) from file. 

51 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

52 DataFrame.to_sql : Write DataFrame to a SQL database. 

53 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

54 

55 Examples 

56 -------- 

57 >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) 

58 >>> original_df 

59 foo bar 

60 0 0 5 

61 1 1 6 

62 2 2 7 

63 3 3 8 

64 4 4 9 

65 >>> pd.to_pickle(original_df, "./dummy.pkl") 

66 

67 >>> unpickled_df = pd.read_pickle("./dummy.pkl") 

68 >>> unpickled_df 

69 foo bar 

70 0 0 5 

71 1 1 6 

72 2 2 7 

73 3 3 8 

74 4 4 9 

75 

76 >>> import os 

77 >>> os.remove("./dummy.pkl") 

78 """ 

79 fp_or_buf, _, compression, should_close = get_filepath_or_buffer( 

80 filepath_or_buffer, compression=compression, mode="wb" 

81 ) 

82 if not isinstance(fp_or_buf, str) and compression == "infer": 

83 compression = None 

84 f, fh = get_handle(fp_or_buf, "wb", compression=compression, is_text=False) 

85 if protocol < 0: 

86 protocol = pickle.HIGHEST_PROTOCOL 

87 try: 

88 f.write(pickle.dumps(obj, protocol=protocol)) 

89 finally: 

90 f.close() 

91 for _f in fh: 

92 _f.close() 

93 if should_close: 

94 try: 

95 fp_or_buf.close() 

96 except ValueError: 

97 pass 

98 

99 

100def read_pickle( 

101 filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] = "infer" 

102): 

103 """ 

104 Load pickled pandas object (or any object) from file. 

105 

106 .. warning:: 

107 

108 Loading pickled data received from untrusted sources can be 

109 unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__. 

110 

111 Parameters 

112 ---------- 

113 filepath_or_buffer : str, path object or file-like object 

114 File path, URL, or buffer where the pickled object will be loaded from. 

115 

116 .. versionchanged:: 1.0.0 

117 Accept URL. URL is not limited to S3 and GCS. 

118 

119 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' 

120 If 'infer' and 'path_or_url' is path-like, then detect compression from 

121 the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no 

122 compression) If 'infer' and 'path_or_url' is not path-like, then use 

123 None (= no decompression). 

124 

125 Returns 

126 ------- 

127 unpickled : same type as object stored in file 

128 

129 See Also 

130 -------- 

131 DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. 

132 Series.to_pickle : Pickle (serialize) Series object to file. 

133 read_hdf : Read HDF5 file into a DataFrame. 

134 read_sql : Read SQL query or database table into a DataFrame. 

135 read_parquet : Load a parquet object, returning a DataFrame. 

136 

137 Notes 

138 ----- 

139 read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3. 

140 

141 Examples 

142 -------- 

143 >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) 

144 >>> original_df 

145 foo bar 

146 0 0 5 

147 1 1 6 

148 2 2 7 

149 3 3 8 

150 4 4 9 

151 >>> pd.to_pickle(original_df, "./dummy.pkl") 

152 

153 >>> unpickled_df = pd.read_pickle("./dummy.pkl") 

154 >>> unpickled_df 

155 foo bar 

156 0 0 5 

157 1 1 6 

158 2 2 7 

159 3 3 8 

160 4 4 9 

161 

162 >>> import os 

163 >>> os.remove("./dummy.pkl") 

164 """ 

165 fp_or_buf, _, compression, should_close = get_filepath_or_buffer( 

166 filepath_or_buffer, compression=compression 

167 ) 

168 if not isinstance(fp_or_buf, str) and compression == "infer": 

169 compression = None 

170 f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False) 

171 

172 # 1) try standard library Pickle 

173 # 2) try pickle_compat (older pandas version) to handle subclass changes 

174 # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError 

175 

176 try: 

177 excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError) 

178 try: 

179 with warnings.catch_warnings(record=True): 

180 # We want to silence any warnings about, e.g. moved modules. 

181 warnings.simplefilter("ignore", Warning) 

182 return pickle.load(f) 

183 except excs_to_catch: 

184 # e.g. 

185 # "No module named 'pandas.core.sparse.series'" 

186 # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib" 

187 return pc.load(f, encoding=None) 

188 except UnicodeDecodeError: 

189 # e.g. can occur for files written in py27; see GH#28645 and GH#31988 

190 return pc.load(f, encoding="latin-1") 

191 finally: 

192 f.close() 

193 for _f in fh: 

194 _f.close() 

195 if should_close: 

196 try: 

197 fp_or_buf.close() 

198 except ValueError: 

199 pass