Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" implement the TimedeltaIndex """ 

2from datetime import datetime 

3 

4import numpy as np 

5 

6from pandas._libs import NaT, Timedelta, index as libindex 

7from pandas.util._decorators import Appender, Substitution 

8 

9from pandas.core.dtypes.common import ( 

10 _TD_DTYPE, 

11 is_float, 

12 is_integer, 

13 is_list_like, 

14 is_scalar, 

15 is_timedelta64_dtype, 

16 is_timedelta64_ns_dtype, 

17 pandas_dtype, 

18) 

19from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna 

20 

21from pandas.core.accessor import delegate_names 

22from pandas.core.arrays import datetimelike as dtl 

23from pandas.core.arrays.timedeltas import TimedeltaArray, _is_convertible_to_td 

24from pandas.core.base import _shared_docs 

25import pandas.core.common as com 

26from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name 

27from pandas.core.indexes.datetimelike import ( 

28 DatetimeIndexOpsMixin, 

29 DatetimelikeDelegateMixin, 

30 DatetimeTimedeltaMixin, 

31) 

32from pandas.core.indexes.extension import inherit_names 

33 

34from pandas.tseries.frequencies import to_offset 

35 

36 

37class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): 

38 # Most attrs are dispatched via datetimelike_{ops,methods} 

39 # Some are "raw" methods, the result is not re-boxed in an Index 

40 # We also have a few "extra" attrs, which may or may not be raw, 

41 # which we don't want to expose in the .dt accessor. 

42 _raw_properties = {"components", "_box_func"} 

43 _raw_methods = {"to_pytimedelta", "sum", "std", "median", "_format_native_types"} 

44 

45 _delegated_properties = TimedeltaArray._datetimelike_ops + list(_raw_properties) 

46 _delegated_methods = TimedeltaArray._datetimelike_methods + list(_raw_methods) 

47 

48 

49@inherit_names( 

50 ["_box_values", "__neg__", "__pos__", "__abs__"], TimedeltaArray, wrap=True 

51) 

52@inherit_names( 

53 [ 

54 "_bool_ops", 

55 "_object_ops", 

56 "_field_ops", 

57 "_datetimelike_ops", 

58 "_datetimelike_methods", 

59 "_other_ops", 

60 ], 

61 TimedeltaArray, 

62) 

63@delegate_names( 

64 TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property" 

65) 

66@delegate_names( 

67 TimedeltaArray, 

68 TimedeltaDelegateMixin._delegated_methods, 

69 typ="method", 

70 overwrite=True, 

71) 

72class TimedeltaIndex( 

73 DatetimeTimedeltaMixin, dtl.TimelikeOps, TimedeltaDelegateMixin, 

74): 

75 """ 

76 Immutable ndarray of timedelta64 data, represented internally as int64, and 

77 which can be boxed to timedelta objects. 

78 

79 Parameters 

80 ---------- 

81 data : array-like (1-dimensional), optional 

82 Optional timedelta-like data to construct index with. 

83 unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional 

84 Which is an integer/float number. 

85 freq : str or pandas offset object, optional 

86 One of pandas date offset strings or corresponding objects. The string 

87 'infer' can be passed in order to set the frequency of the index as the 

88 inferred frequency upon creation. 

89 copy : bool 

90 Make a copy of input ndarray. 

91 name : object 

92 Name to be stored in the index. 

93 

94 Attributes 

95 ---------- 

96 days 

97 seconds 

98 microseconds 

99 nanoseconds 

100 components 

101 inferred_freq 

102 

103 Methods 

104 ------- 

105 to_pytimedelta 

106 to_series 

107 round 

108 floor 

109 ceil 

110 to_frame 

111 mean 

112 

113 See Also 

114 -------- 

115 Index : The base pandas Index type. 

116 Timedelta : Represents a duration between two dates or times. 

117 DatetimeIndex : Index of datetime64 data. 

118 PeriodIndex : Index of Period data. 

119 timedelta_range : Create a fixed-frequency TimedeltaIndex. 

120 

121 Notes 

122 ----- 

123 To learn more about the frequency strings, please see `this link 

124 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

125 """ 

126 

127 _typ = "timedeltaindex" 

128 

129 _engine_type = libindex.TimedeltaEngine 

130 

131 _comparables = ["name", "freq"] 

132 _attributes = ["name", "freq"] 

133 _is_numeric_dtype = True 

134 _infer_as_myclass = True 

135 

136 # ------------------------------------------------------------------- 

137 # Constructors 

138 

139 def __new__( 

140 cls, 

141 data=None, 

142 unit=None, 

143 freq=None, 

144 closed=None, 

145 dtype=_TD_DTYPE, 

146 copy=False, 

147 name=None, 

148 ): 

149 name = maybe_extract_name(name, data, cls) 

150 

151 if is_scalar(data): 

152 raise TypeError( 

153 f"{cls.__name__}() must be called with a " 

154 f"collection of some kind, {repr(data)} was passed" 

155 ) 

156 

157 if unit in {"Y", "y", "M"}: 

158 raise ValueError( 

159 "Units 'M' and 'Y' are no longer supported, as they do not " 

160 "represent unambiguous timedelta values durations." 

161 ) 

162 

163 if isinstance(data, TimedeltaArray): 

164 if copy: 

165 data = data.copy() 

166 return cls._simple_new(data, name=name, freq=freq) 

167 

168 if isinstance(data, TimedeltaIndex) and freq is None and name is None: 

169 if copy: 

170 return data.copy() 

171 else: 

172 return data._shallow_copy() 

173 

174 # - Cases checked above all return/raise before reaching here - # 

175 

176 tdarr = TimedeltaArray._from_sequence( 

177 data, freq=freq, unit=unit, dtype=dtype, copy=copy 

178 ) 

179 return cls._simple_new(tdarr._data, freq=tdarr.freq, name=name) 

180 

181 @classmethod 

182 def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): 

183 # `dtype` is passed by _shallow_copy in corner cases, should always 

184 # be timedelta64[ns] if present 

185 if not isinstance(values, TimedeltaArray): 

186 values = TimedeltaArray._simple_new(values, dtype=dtype, freq=freq) 

187 else: 

188 if freq is None: 

189 freq = values.freq 

190 assert isinstance(values, TimedeltaArray), type(values) 

191 assert dtype == _TD_DTYPE, dtype 

192 assert values.dtype == "m8[ns]", values.dtype 

193 

194 tdarr = TimedeltaArray._simple_new(values._data, freq=freq) 

195 result = object.__new__(cls) 

196 result._data = tdarr 

197 result._name = name 

198 # For groupby perf. See note in indexes/base about _index_data 

199 result._index_data = tdarr._data 

200 

201 result._reset_identity() 

202 return result 

203 

204 # ------------------------------------------------------------------- 

205 # Rendering Methods 

206 

207 @property 

208 def _formatter_func(self): 

209 from pandas.io.formats.format import _get_format_timedelta64 

210 

211 return _get_format_timedelta64(self, box=True) 

212 

213 # ------------------------------------------------------------------- 

214 

215 @Appender(_index_shared_docs["astype"]) 

216 def astype(self, dtype, copy=True): 

217 dtype = pandas_dtype(dtype) 

218 if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): 

219 # Have to repeat the check for 'timedelta64' (not ns) dtype 

220 # so that we can return a numeric index, since pandas will return 

221 # a TimedeltaIndex when dtype='timedelta' 

222 result = self._data.astype(dtype, copy=copy) 

223 if self.hasnans: 

224 return Index(result, name=self.name) 

225 return Index(result.astype("i8"), name=self.name) 

226 return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) 

227 

228 def _maybe_promote(self, other): 

229 if other.inferred_type == "timedelta": 

230 other = TimedeltaIndex(other) 

231 return self, other 

232 

233 def get_value(self, series, key): 

234 """ 

235 Fast lookup of value from 1-dimensional ndarray. Only use this if you 

236 know what you're doing 

237 """ 

238 

239 if _is_convertible_to_td(key): 

240 key = Timedelta(key) 

241 return self.get_value_maybe_box(series, key) 

242 

243 try: 

244 value = Index.get_value(self, series, key) 

245 except KeyError: 

246 try: 

247 loc = self._get_string_slice(key) 

248 return series[loc] 

249 except (TypeError, ValueError, KeyError): 

250 pass 

251 

252 try: 

253 return self.get_value_maybe_box(series, key) 

254 except (TypeError, ValueError, KeyError): 

255 raise KeyError(key) 

256 else: 

257 return com.maybe_box(self, value, series, key) 

258 

259 def get_value_maybe_box(self, series, key: Timedelta): 

260 values = self._engine.get_value(com.values_from_object(series), key) 

261 return com.maybe_box(self, values, series, key) 

262 

263 def get_loc(self, key, method=None, tolerance=None): 

264 """ 

265 Get integer location for requested label 

266 

267 Returns 

268 ------- 

269 loc : int 

270 """ 

271 if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): 

272 # GH#20464 datetime check here is to ensure we don't allow 

273 # datetime objects to be incorrectly treated as timedelta 

274 # objects; NaT is a special case because it plays a double role 

275 # as Not-A-Timedelta 

276 raise TypeError 

277 

278 if isna(key): 

279 key = NaT 

280 

281 if tolerance is not None: 

282 # try converting tolerance now, so errors don't get swallowed by 

283 # the try/except clauses below 

284 tolerance = self._convert_tolerance(tolerance, np.asarray(key)) 

285 

286 if _is_convertible_to_td(key) or key is NaT: 

287 key = Timedelta(key) 

288 return Index.get_loc(self, key, method, tolerance) 

289 

290 try: 

291 return Index.get_loc(self, key, method, tolerance) 

292 except (KeyError, ValueError, TypeError): 

293 try: 

294 return self._get_string_slice(key) 

295 except (TypeError, KeyError, ValueError): 

296 pass 

297 

298 try: 

299 stamp = Timedelta(key) 

300 return Index.get_loc(self, stamp, method, tolerance) 

301 except (KeyError, ValueError): 

302 raise KeyError(key) 

303 

304 def _maybe_cast_slice_bound(self, label, side, kind): 

305 """ 

306 If label is a string, cast it to timedelta according to resolution. 

307 

308 Parameters 

309 ---------- 

310 label : object 

311 side : {'left', 'right'} 

312 kind : {'ix', 'loc', 'getitem'} 

313 

314 Returns 

315 ------- 

316 label : object 

317 """ 

318 assert kind in ["ix", "loc", "getitem", None] 

319 

320 if isinstance(label, str): 

321 parsed = Timedelta(label) 

322 lbound = parsed.round(parsed.resolution_string) 

323 if side == "left": 

324 return lbound 

325 else: 

326 return lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") 

327 elif is_integer(label) or is_float(label): 

328 self._invalid_indexer("slice", label) 

329 

330 return label 

331 

332 def _get_string_slice(self, key): 

333 if is_integer(key) or is_float(key) or key is NaT: 

334 self._invalid_indexer("slice", key) 

335 loc = self._partial_td_slice(key) 

336 return loc 

337 

338 def _partial_td_slice(self, key): 

339 

340 # given a key, try to figure out a location for a partial slice 

341 if not isinstance(key, str): 

342 return key 

343 

344 raise NotImplementedError 

345 

346 @Substitution(klass="TimedeltaIndex") 

347 @Appender(_shared_docs["searchsorted"]) 

348 def searchsorted(self, value, side="left", sorter=None): 

349 if isinstance(value, (np.ndarray, Index)): 

350 if not type(self._data)._is_recognized_dtype(value): 

351 raise TypeError( 

352 "searchsorted requires compatible dtype or scalar, " 

353 f"not {type(value).__name__}" 

354 ) 

355 value = type(self._data)(value) 

356 self._data._check_compatible_with(value) 

357 

358 elif isinstance(value, self._data._recognized_scalars): 

359 self._data._check_compatible_with(value) 

360 value = self._data._scalar_type(value) 

361 

362 elif not isinstance(value, TimedeltaArray): 

363 raise TypeError( 

364 "searchsorted requires compatible dtype or scalar, " 

365 f"not {type(value).__name__}" 

366 ) 

367 

368 return self._data.searchsorted(value, side=side, sorter=sorter) 

369 

370 def is_type_compatible(self, typ) -> bool: 

371 return typ == self.inferred_type or typ == "timedelta" 

372 

373 @property 

374 def inferred_type(self) -> str: 

375 return "timedelta64" 

376 

377 def insert(self, loc, item): 

378 """ 

379 Make new Index inserting new item at location 

380 

381 Parameters 

382 ---------- 

383 loc : int 

384 item : object 

385 If not either a Python datetime or a numpy integer-like, returned 

386 Index dtype will be object rather than datetime. 

387 

388 Returns 

389 ------- 

390 new_index : Index 

391 """ 

392 # try to convert if possible 

393 if isinstance(item, self._data._recognized_scalars): 

394 item = self._data._scalar_type(item) 

395 elif is_valid_nat_for_dtype(item, self.dtype): 

396 # GH 18295 

397 item = self._na_value 

398 elif is_scalar(item) and isna(item): 

399 # i.e. datetime64("NaT") 

400 raise TypeError( 

401 f"cannot insert {type(self).__name__} with incompatible label" 

402 ) 

403 

404 freq = None 

405 if isinstance(item, self._data._scalar_type) or item is NaT: 

406 self._data._check_compatible_with(item, setitem=True) 

407 

408 # check freq can be preserved on edge cases 

409 if self.size and self.freq is not None: 

410 if item is NaT: 

411 pass 

412 elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: 

413 freq = self.freq 

414 elif (loc == len(self)) and item - self.freq == self[-1]: 

415 freq = self.freq 

416 item = item.asm8 

417 

418 try: 

419 new_i8s = np.concatenate( 

420 (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) 

421 ) 

422 return self._shallow_copy(new_i8s, freq=freq) 

423 except (AttributeError, TypeError): 

424 

425 # fall back to object index 

426 if isinstance(item, str): 

427 return self.astype(object).insert(loc, item) 

428 raise TypeError( 

429 f"cannot insert {type(self).__name__} with incompatible label" 

430 ) 

431 

432 

433TimedeltaIndex._add_logical_methods_disabled() 

434 

435 

436def timedelta_range( 

437 start=None, end=None, periods=None, freq=None, name=None, closed=None 

438) -> TimedeltaIndex: 

439 """ 

440 Return a fixed frequency TimedeltaIndex, with day as the default 

441 frequency. 

442 

443 Parameters 

444 ---------- 

445 start : str or timedelta-like, default None 

446 Left bound for generating timedeltas. 

447 end : str or timedelta-like, default None 

448 Right bound for generating timedeltas. 

449 periods : int, default None 

450 Number of periods to generate. 

451 freq : str or DateOffset, default 'D' 

452 Frequency strings can have multiples, e.g. '5H'. 

453 name : str, default None 

454 Name of the resulting TimedeltaIndex. 

455 closed : str, default None 

456 Make the interval closed with respect to the given frequency to 

457 the 'left', 'right', or both sides (None). 

458 

459 Returns 

460 ------- 

461 rng : TimedeltaIndex 

462 

463 Notes 

464 ----- 

465 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, 

466 exactly three must be specified. If ``freq`` is omitted, the resulting 

467 ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between 

468 ``start`` and ``end`` (closed on both sides). 

469 

470 To learn more about the frequency strings, please see `this link 

471 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

472 

473 Examples 

474 -------- 

475 

476 >>> pd.timedelta_range(start='1 day', periods=4) 

477 TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], 

478 dtype='timedelta64[ns]', freq='D') 

479 

480 The ``closed`` parameter specifies which endpoint is included. The default 

481 behavior is to include both endpoints. 

482 

483 >>> pd.timedelta_range(start='1 day', periods=4, closed='right') 

484 TimedeltaIndex(['2 days', '3 days', '4 days'], 

485 dtype='timedelta64[ns]', freq='D') 

486 

487 The ``freq`` parameter specifies the frequency of the TimedeltaIndex. 

488 Only fixed frequencies can be passed, non-fixed frequencies such as 

489 'M' (month end) will raise. 

490 

491 >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') 

492 TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', 

493 '1 days 18:00:00', '2 days 00:00:00'], 

494 dtype='timedelta64[ns]', freq='6H') 

495 

496 Specify ``start``, ``end``, and ``periods``; the frequency is generated 

497 automatically (linearly spaced). 

498 

499 >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) 

500 TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', 

501 '5 days 00:00:00'], 

502 dtype='timedelta64[ns]', freq=None) 

503 """ 

504 if freq is None and com.any_none(periods, start, end): 

505 freq = "D" 

506 

507 freq, freq_infer = dtl.maybe_infer_freq(freq) 

508 tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed) 

509 return TimedeltaIndex._simple_new(tdarr._data, freq=tdarr.freq, name=name)