Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from datetime import timedelta 

2import operator 

3from typing import Any, Callable, List, Optional, Sequence, Union 

4 

5import numpy as np 

6 

7from pandas._libs.tslibs import ( 

8 NaT, 

9 NaTType, 

10 frequencies as libfrequencies, 

11 iNaT, 

12 period as libperiod, 

13) 

14from pandas._libs.tslibs.fields import isleapyear_arr 

15from pandas._libs.tslibs.period import ( 

16 DIFFERENT_FREQ, 

17 IncompatibleFrequency, 

18 Period, 

19 get_period_field_arr, 

20 period_asfreq_arr, 

21) 

22from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds 

23from pandas.util._decorators import cache_readonly 

24 

25from pandas.core.dtypes.common import ( 

26 _TD_DTYPE, 

27 ensure_object, 

28 is_datetime64_dtype, 

29 is_float_dtype, 

30 is_period_dtype, 

31 pandas_dtype, 

32) 

33from pandas.core.dtypes.dtypes import PeriodDtype 

34from pandas.core.dtypes.generic import ( 

35 ABCIndexClass, 

36 ABCPeriodArray, 

37 ABCPeriodIndex, 

38 ABCSeries, 

39) 

40from pandas.core.dtypes.missing import isna, notna 

41 

42import pandas.core.algorithms as algos 

43from pandas.core.arrays import datetimelike as dtl 

44import pandas.core.common as com 

45 

46from pandas.tseries import frequencies 

47from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick 

48 

49 

50def _field_accessor(name, alias, docstring=None): 

51 def f(self): 

52 base, mult = libfrequencies.get_freq_code(self.freq) 

53 result = get_period_field_arr(alias, self.asi8, base) 

54 return result 

55 

56 f.__name__ = name 

57 f.__doc__ = docstring 

58 return property(f) 

59 

60 

61class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): 

62 """ 

63 Pandas ExtensionArray for storing Period data. 

64 

65 Users should use :func:`period_array` to create new instances. 

66 

67 Parameters 

68 ---------- 

69 values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] 

70 The data to store. These should be arrays that can be directly 

71 converted to ordinals without inference or copy (PeriodArray, 

72 ndarray[int64]), or a box around such an array (Series[period], 

73 PeriodIndex). 

74 freq : str or DateOffset 

75 The `freq` to use for the array. Mostly applicable when `values` 

76 is an ndarray of integers, when `freq` is required. When `values` 

77 is a PeriodArray (or box around), it's checked that ``values.freq`` 

78 matches `freq`. 

79 dtype : PeriodDtype, optional 

80 A PeriodDtype instance from which to extract a `freq`. If both 

81 `freq` and `dtype` are specified, then the frequencies must match. 

82 copy : bool, default False 

83 Whether to copy the ordinals before storing. 

84 

85 Attributes 

86 ---------- 

87 None 

88 

89 Methods 

90 ------- 

91 None 

92 

93 See Also 

94 -------- 

95 period_array : Create a new PeriodArray. 

96 PeriodIndex : Immutable Index for period data. 

97 

98 Notes 

99 ----- 

100 There are two components to a PeriodArray 

101 

102 - ordinals : integer ndarray 

103 - freq : pd.tseries.offsets.Offset 

104 

105 The values are physically stored as a 1-D ndarray of integers. These are 

106 called "ordinals" and represent some kind of offset from a base. 

107 

108 The `freq` indicates the span covered by each element of the array. 

109 All elements in the PeriodArray have the same `freq`. 

110 """ 

111 

112 # array priority higher than numpy scalars 

113 __array_priority__ = 1000 

114 _typ = "periodarray" # ABCPeriodArray 

115 _scalar_type = Period 

116 _recognized_scalars = (Period,) 

117 _is_recognized_dtype = is_period_dtype 

118 

119 # Names others delegate to us 

120 _other_ops: List[str] = [] 

121 _bool_ops = ["is_leap_year"] 

122 _object_ops = ["start_time", "end_time", "freq"] 

123 _field_ops = [ 

124 "year", 

125 "month", 

126 "day", 

127 "hour", 

128 "minute", 

129 "second", 

130 "weekofyear", 

131 "weekday", 

132 "week", 

133 "dayofweek", 

134 "dayofyear", 

135 "quarter", 

136 "qyear", 

137 "days_in_month", 

138 "daysinmonth", 

139 ] 

140 _datetimelike_ops = _field_ops + _object_ops + _bool_ops 

141 _datetimelike_methods = ["strftime", "to_timestamp", "asfreq"] 

142 

143 # -------------------------------------------------------------------- 

144 # Constructors 

145 

146 def __init__(self, values, freq=None, dtype=None, copy=False): 

147 freq = validate_dtype_freq(dtype, freq) 

148 

149 if freq is not None: 

150 freq = Period._maybe_convert_freq(freq) 

151 

152 if isinstance(values, ABCSeries): 

153 values = values._values 

154 if not isinstance(values, type(self)): 

155 raise TypeError("Incorrect dtype") 

156 

157 elif isinstance(values, ABCPeriodIndex): 

158 values = values._values 

159 

160 if isinstance(values, type(self)): 

161 if freq is not None and freq != values.freq: 

162 raise raise_on_incompatible(values, freq) 

163 values, freq = values._data, values.freq 

164 

165 values = np.array(values, dtype="int64", copy=copy) 

166 self._data = values 

167 if freq is None: 

168 raise ValueError("freq is not specified and cannot be inferred") 

169 self._dtype = PeriodDtype(freq) 

170 

171 @classmethod 

172 def _simple_new(cls, values, freq=None, **kwargs): 

173 # alias for PeriodArray.__init__ 

174 return cls(values, freq=freq, **kwargs) 

175 

176 @classmethod 

177 def _from_sequence( 

178 cls, 

179 scalars: Sequence[Optional[Period]], 

180 dtype: Optional[PeriodDtype] = None, 

181 copy: bool = False, 

182 ) -> ABCPeriodArray: 

183 if dtype: 

184 freq = dtype.freq 

185 else: 

186 freq = None 

187 

188 if isinstance(scalars, cls): 

189 validate_dtype_freq(scalars.dtype, freq) 

190 if copy: 

191 scalars = scalars.copy() 

192 return scalars 

193 

194 periods = np.asarray(scalars, dtype=object) 

195 if copy: 

196 periods = periods.copy() 

197 

198 freq = freq or libperiod.extract_freq(periods) 

199 ordinals = libperiod.extract_ordinals(periods, freq) 

200 return cls(ordinals, freq=freq) 

201 

202 @classmethod 

203 def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): 

204 return cls._from_sequence(strings, dtype, copy) 

205 

206 @classmethod 

207 def _from_datetime64(cls, data, freq, tz=None): 

208 """ 

209 Construct a PeriodArray from a datetime64 array 

210 

211 Parameters 

212 ---------- 

213 data : ndarray[datetime64[ns], datetime64[ns, tz]] 

214 freq : str or Tick 

215 tz : tzinfo, optional 

216 

217 Returns 

218 ------- 

219 PeriodArray[freq] 

220 """ 

221 data, freq = dt64arr_to_periodarr(data, freq, tz) 

222 return cls(data, freq=freq) 

223 

224 @classmethod 

225 def _generate_range(cls, start, end, periods, freq, fields): 

226 periods = dtl.validate_periods(periods) 

227 

228 if freq is not None: 

229 freq = Period._maybe_convert_freq(freq) 

230 

231 field_count = len(fields) 

232 if start is not None or end is not None: 

233 if field_count > 0: 

234 raise ValueError( 

235 "Can either instantiate from fields or endpoints, but not both" 

236 ) 

237 subarr, freq = _get_ordinal_range(start, end, periods, freq) 

238 elif field_count > 0: 

239 subarr, freq = _range_from_fields(freq=freq, **fields) 

240 else: 

241 raise ValueError("Not enough parameters to construct Period range") 

242 

243 return subarr, freq 

244 

245 # ----------------------------------------------------------------- 

246 # DatetimeLike Interface 

247 

248 def _unbox_scalar(self, value: Union[Period, NaTType]) -> int: 

249 if value is NaT: 

250 return value.value 

251 elif isinstance(value, self._scalar_type): 

252 if not isna(value): 

253 self._check_compatible_with(value) 

254 return value.ordinal 

255 else: 

256 raise ValueError(f"'value' should be a Period. Got '{value}' instead.") 

257 

258 def _scalar_from_string(self, value: str) -> Period: 

259 return Period(value, freq=self.freq) 

260 

261 def _check_compatible_with(self, other, setitem: bool = False): 

262 if other is NaT: 

263 return 

264 if self.freqstr != other.freqstr: 

265 raise raise_on_incompatible(self, other) 

266 

267 # -------------------------------------------------------------------- 

268 # Data / Attributes 

269 

270 @cache_readonly 

271 def dtype(self): 

272 return self._dtype 

273 

274 # error: Read-only property cannot override read-write property [misc] 

275 @property # type: ignore 

276 def freq(self): 

277 """ 

278 Return the frequency object for this PeriodArray. 

279 """ 

280 return self.dtype.freq 

281 

282 def __array__(self, dtype=None) -> np.ndarray: 

283 # overriding DatetimelikeArray 

284 return np.array(list(self), dtype=object) 

285 

286 def __arrow_array__(self, type=None): 

287 """ 

288 Convert myself into a pyarrow Array. 

289 """ 

290 import pyarrow 

291 from pandas.core.arrays._arrow_utils import ArrowPeriodType 

292 

293 if type is not None: 

294 if pyarrow.types.is_integer(type): 

295 return pyarrow.array(self._data, mask=self.isna(), type=type) 

296 elif isinstance(type, ArrowPeriodType): 

297 # ensure we have the same freq 

298 if self.freqstr != type.freq: 

299 raise TypeError( 

300 "Not supported to convert PeriodArray to array with different" 

301 " 'freq' ({0} vs {1})".format(self.freqstr, type.freq) 

302 ) 

303 else: 

304 raise TypeError( 

305 "Not supported to convert PeriodArray to '{0}' type".format(type) 

306 ) 

307 

308 period_type = ArrowPeriodType(self.freqstr) 

309 storage_array = pyarrow.array(self._data, mask=self.isna(), type="int64") 

310 return pyarrow.ExtensionArray.from_storage(period_type, storage_array) 

311 

312 # -------------------------------------------------------------------- 

313 # Vectorized analogues of Period properties 

314 

315 year = _field_accessor( 

316 "year", 

317 0, 

318 """ 

319 The year of the period. 

320 """, 

321 ) 

322 month = _field_accessor( 

323 "month", 

324 3, 

325 """ 

326 The month as January=1, December=12. 

327 """, 

328 ) 

329 day = _field_accessor( 

330 "day", 

331 4, 

332 """ 

333 The days of the period. 

334 """, 

335 ) 

336 hour = _field_accessor( 

337 "hour", 

338 5, 

339 """ 

340 The hour of the period. 

341 """, 

342 ) 

343 minute = _field_accessor( 

344 "minute", 

345 6, 

346 """ 

347 The minute of the period. 

348 """, 

349 ) 

350 second = _field_accessor( 

351 "second", 

352 7, 

353 """ 

354 The second of the period. 

355 """, 

356 ) 

357 weekofyear = _field_accessor( 

358 "week", 

359 8, 

360 """ 

361 The week ordinal of the year. 

362 """, 

363 ) 

364 week = weekofyear 

365 dayofweek = _field_accessor( 

366 "dayofweek", 

367 10, 

368 """ 

369 The day of the week with Monday=0, Sunday=6. 

370 """, 

371 ) 

372 weekday = dayofweek 

373 dayofyear = day_of_year = _field_accessor( 

374 "dayofyear", 

375 9, 

376 """ 

377 The ordinal day of the year. 

378 """, 

379 ) 

380 quarter = _field_accessor( 

381 "quarter", 

382 2, 

383 """ 

384 The quarter of the date. 

385 """, 

386 ) 

387 qyear = _field_accessor("qyear", 1) 

388 days_in_month = _field_accessor( 

389 "days_in_month", 

390 11, 

391 """ 

392 The number of days in the month. 

393 """, 

394 ) 

395 daysinmonth = days_in_month 

396 

397 @property 

398 def is_leap_year(self): 

399 """ 

400 Logical indicating if the date belongs to a leap year. 

401 """ 

402 return isleapyear_arr(np.asarray(self.year)) 

403 

404 @property 

405 def start_time(self): 

406 return self.to_timestamp(how="start") 

407 

408 @property 

409 def end_time(self): 

410 return self.to_timestamp(how="end") 

411 

412 def to_timestamp(self, freq=None, how="start"): 

413 """ 

414 Cast to DatetimeArray/Index. 

415 

416 Parameters 

417 ---------- 

418 freq : str or DateOffset, optional 

419 Target frequency. The default is 'D' for week or longer, 

420 'S' otherwise. 

421 how : {'s', 'e', 'start', 'end'} 

422 Whether to use the start or end of the time period being converted. 

423 

424 Returns 

425 ------- 

426 DatetimeArray/Index 

427 """ 

428 from pandas.core.arrays import DatetimeArray 

429 

430 how = libperiod._validate_end_alias(how) 

431 

432 end = how == "E" 

433 if end: 

434 if freq == "B": 

435 # roll forward to ensure we land on B date 

436 adjust = Timedelta(1, "D") - Timedelta(1, "ns") 

437 return self.to_timestamp(how="start") + adjust 

438 else: 

439 adjust = Timedelta(1, "ns") 

440 return (self + self.freq).to_timestamp(how="start") - adjust 

441 

442 if freq is None: 

443 base, mult = libfrequencies.get_freq_code(self.freq) 

444 freq = libfrequencies.get_to_timestamp_base(base) 

445 else: 

446 freq = Period._maybe_convert_freq(freq) 

447 

448 base, mult = libfrequencies.get_freq_code(freq) 

449 new_data = self.asfreq(freq, how=how) 

450 

451 new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) 

452 return DatetimeArray._from_sequence(new_data, freq="infer") 

453 

454 # -------------------------------------------------------------------- 

455 # Array-like / EA-Interface Methods 

456 

457 def _values_for_argsort(self): 

458 return self._data 

459 

460 # -------------------------------------------------------------------- 

461 

462 def _time_shift(self, periods, freq=None): 

463 """ 

464 Shift each value by `periods`. 

465 

466 Note this is different from ExtensionArray.shift, which 

467 shifts the *position* of each element, padding the end with 

468 missing values. 

469 

470 Parameters 

471 ---------- 

472 periods : int 

473 Number of periods to shift by. 

474 freq : pandas.DateOffset, pandas.Timedelta, or str 

475 Frequency increment to shift by. 

476 """ 

477 if freq is not None: 

478 raise TypeError( 

479 "`freq` argument is not supported for " 

480 f"{type(self).__name__}._time_shift" 

481 ) 

482 values = self.asi8 + periods * self.freq.n 

483 if self._hasnans: 

484 values[self._isnan] = iNaT 

485 return type(self)(values, freq=self.freq) 

486 

487 @property 

488 def _box_func(self): 

489 return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) 

490 

491 def asfreq(self, freq=None, how="E"): 

492 """ 

493 Convert the Period Array/Index to the specified frequency `freq`. 

494 

495 Parameters 

496 ---------- 

497 freq : str 

498 A frequency. 

499 how : str {'E', 'S'} 

500 Whether the elements should be aligned to the end 

501 or start within pa period. 

502 

503 * 'E', 'END', or 'FINISH' for end, 

504 * 'S', 'START', or 'BEGIN' for start. 

505 

506 January 31st ('END') vs. January 1st ('START') for example. 

507 

508 Returns 

509 ------- 

510 Period Array/Index 

511 Constructed with the new frequency. 

512 

513 Examples 

514 -------- 

515 >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') 

516 >>> pidx 

517 PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], 

518 dtype='period[A-DEC]', freq='A-DEC') 

519 

520 >>> pidx.asfreq('M') 

521 PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', 

522 '2015-12'], dtype='period[M]', freq='M') 

523 

524 >>> pidx.asfreq('M', how='S') 

525 PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', 

526 '2015-01'], dtype='period[M]', freq='M') 

527 """ 

528 how = libperiod._validate_end_alias(how) 

529 

530 freq = Period._maybe_convert_freq(freq) 

531 

532 base1, mult1 = libfrequencies.get_freq_code(self.freq) 

533 base2, mult2 = libfrequencies.get_freq_code(freq) 

534 

535 asi8 = self.asi8 

536 # mult1 can't be negative or 0 

537 end = how == "E" 

538 if end: 

539 ordinal = asi8 + mult1 - 1 

540 else: 

541 ordinal = asi8 

542 

543 new_data = period_asfreq_arr(ordinal, base1, base2, end) 

544 

545 if self._hasnans: 

546 new_data[self._isnan] = iNaT 

547 

548 return type(self)(new_data, freq=freq) 

549 

550 # ------------------------------------------------------------------ 

551 # Rendering Methods 

552 

553 def _formatter(self, boxed=False): 

554 if boxed: 

555 return str 

556 return "'{}'".format 

557 

558 def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): 

559 """ 

560 actually format my specific types 

561 """ 

562 values = self.astype(object) 

563 

564 if date_format: 

565 formatter = lambda dt: dt.strftime(date_format) 

566 else: 

567 formatter = lambda dt: str(dt) 

568 

569 if self._hasnans: 

570 mask = self._isnan 

571 values[mask] = na_rep 

572 imask = ~mask 

573 values[imask] = np.array([formatter(dt) for dt in values[imask]]) 

574 else: 

575 values = np.array([formatter(dt) for dt in values]) 

576 return values 

577 

578 # ------------------------------------------------------------------ 

579 

580 def astype(self, dtype, copy=True): 

581 # We handle Period[T] -> Period[U] 

582 # Our parent handles everything else. 

583 dtype = pandas_dtype(dtype) 

584 

585 if is_period_dtype(dtype): 

586 return self.asfreq(dtype.freq) 

587 return super().astype(dtype, copy=copy) 

588 

589 # ------------------------------------------------------------------ 

590 # Arithmetic Methods 

591 

592 def _sub_datelike(self, other): 

593 assert other is not NaT 

594 return NotImplemented 

595 

596 def _sub_period(self, other): 

597 # If the operation is well-defined, we return an object-Index 

598 # of DateOffsets. Null entries are filled with pd.NaT 

599 self._check_compatible_with(other) 

600 asi8 = self.asi8 

601 new_data = asi8 - other.ordinal 

602 new_data = np.array([self.freq * x for x in new_data]) 

603 

604 if self._hasnans: 

605 new_data[self._isnan] = NaT 

606 

607 return new_data 

608 

609 def _addsub_int_array( 

610 self, other: np.ndarray, op: Callable[[Any, Any], Any], 

611 ) -> "PeriodArray": 

612 """ 

613 Add or subtract array of integers; equivalent to applying 

614 `_time_shift` pointwise. 

615 

616 Parameters 

617 ---------- 

618 other : np.ndarray[integer-dtype] 

619 op : {operator.add, operator.sub} 

620 

621 Returns 

622 ------- 

623 result : PeriodArray 

624 """ 

625 

626 assert op in [operator.add, operator.sub] 

627 if op is operator.sub: 

628 other = -other 

629 res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) 

630 res_values = res_values.view("i8") 

631 res_values[self._isnan] = iNaT 

632 return type(self)(res_values, freq=self.freq) 

633 

634 def _add_offset(self, other): 

635 assert not isinstance(other, Tick) 

636 base = libfrequencies.get_base_alias(other.rule_code) 

637 if base != self.freq.rule_code: 

638 raise raise_on_incompatible(self, other) 

639 

640 # Note: when calling parent class's _add_timedeltalike_scalar, 

641 # it will call delta_to_nanoseconds(delta). Because delta here 

642 # is an integer, delta_to_nanoseconds will return it unchanged. 

643 result = super()._add_timedeltalike_scalar(other.n) 

644 return type(self)(result, freq=self.freq) 

645 

646 def _add_timedeltalike_scalar(self, other): 

647 """ 

648 Parameters 

649 ---------- 

650 other : timedelta, Tick, np.timedelta64 

651 

652 Returns 

653 ------- 

654 result : ndarray[int64] 

655 """ 

656 assert isinstance(self.freq, Tick) # checked by calling function 

657 assert isinstance(other, (timedelta, np.timedelta64, Tick)) 

658 

659 if notna(other): 

660 # special handling for np.timedelta64("NaT"), avoid calling 

661 # _check_timedeltalike_freq_compat as that would raise TypeError 

662 other = self._check_timedeltalike_freq_compat(other) 

663 

664 # Note: when calling parent class's _add_timedeltalike_scalar, 

665 # it will call delta_to_nanoseconds(delta). Because delta here 

666 # is an integer, delta_to_nanoseconds will return it unchanged. 

667 ordinals = super()._add_timedeltalike_scalar(other) 

668 return ordinals 

669 

670 def _add_delta_tdi(self, other): 

671 """ 

672 Parameters 

673 ---------- 

674 other : TimedeltaArray or ndarray[timedelta64] 

675 

676 Returns 

677 ------- 

678 result : ndarray[int64] 

679 """ 

680 assert isinstance(self.freq, Tick) # checked by calling function 

681 

682 if not np.all(isna(other)): 

683 delta = self._check_timedeltalike_freq_compat(other) 

684 else: 

685 # all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT 

686 return self + np.timedelta64("NaT") 

687 

688 return self._addsub_int_array(delta, operator.add).asi8 

689 

690 def _add_delta(self, other): 

691 """ 

692 Add a timedelta-like, Tick, or TimedeltaIndex-like object 

693 to self, yielding a new PeriodArray 

694 

695 Parameters 

696 ---------- 

697 other : {timedelta, np.timedelta64, Tick, 

698 TimedeltaIndex, ndarray[timedelta64]} 

699 

700 Returns 

701 ------- 

702 result : PeriodArray 

703 """ 

704 if not isinstance(self.freq, Tick): 

705 # We cannot add timedelta-like to non-tick PeriodArray 

706 raise raise_on_incompatible(self, other) 

707 

708 new_ordinals = super()._add_delta(other) 

709 return type(self)(new_ordinals, freq=self.freq) 

710 

711 def _check_timedeltalike_freq_compat(self, other): 

712 """ 

713 Arithmetic operations with timedelta-like scalars or array `other` 

714 are only valid if `other` is an integer multiple of `self.freq`. 

715 If the operation is valid, find that integer multiple. Otherwise, 

716 raise because the operation is invalid. 

717 

718 Parameters 

719 ---------- 

720 other : timedelta, np.timedelta64, Tick, 

721 ndarray[timedelta64], TimedeltaArray, TimedeltaIndex 

722 

723 Returns 

724 ------- 

725 multiple : int or ndarray[int64] 

726 

727 Raises 

728 ------ 

729 IncompatibleFrequency 

730 """ 

731 assert isinstance(self.freq, Tick) # checked by calling function 

732 own_offset = frequencies.to_offset(self.freq.rule_code) 

733 base_nanos = delta_to_nanoseconds(own_offset) 

734 

735 if isinstance(other, (timedelta, np.timedelta64, Tick)): 

736 nanos = delta_to_nanoseconds(other) 

737 

738 elif isinstance(other, np.ndarray): 

739 # numpy timedelta64 array; all entries must be compatible 

740 assert other.dtype.kind == "m" 

741 if other.dtype != _TD_DTYPE: 

742 # i.e. non-nano unit 

743 # TODO: disallow unit-less timedelta64 

744 other = other.astype(_TD_DTYPE) 

745 nanos = other.view("i8") 

746 else: 

747 # TimedeltaArray/Index 

748 nanos = other.asi8 

749 

750 if np.all(nanos % base_nanos == 0): 

751 # nanos being added is an integer multiple of the 

752 # base-frequency to self.freq 

753 delta = nanos // base_nanos 

754 # delta is the integer (or integer-array) number of periods 

755 # by which will be added to self. 

756 return delta 

757 

758 raise raise_on_incompatible(self, other) 

759 

760 

761def raise_on_incompatible(left, right): 

762 """ 

763 Helper function to render a consistent error message when raising 

764 IncompatibleFrequency. 

765 

766 Parameters 

767 ---------- 

768 left : PeriodArray 

769 right : None, DateOffset, Period, ndarray, or timedelta-like 

770 

771 Returns 

772 ------- 

773 IncompatibleFrequency 

774 Exception to be raised by the caller. 

775 """ 

776 # GH#24283 error message format depends on whether right is scalar 

777 if isinstance(right, np.ndarray) or right is None: 

778 other_freq = None 

779 elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)): 

780 other_freq = right.freqstr 

781 else: 

782 other_freq = _delta_to_tick(Timedelta(right)).freqstr 

783 

784 msg = DIFFERENT_FREQ.format( 

785 cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq 

786 ) 

787 return IncompatibleFrequency(msg) 

788 

789 

790# ------------------------------------------------------------------- 

791# Constructor Helpers 

792 

793 

794def period_array( 

795 data: Sequence[Optional[Period]], 

796 freq: Optional[Union[str, Tick]] = None, 

797 copy: bool = False, 

798) -> PeriodArray: 

799 """ 

800 Construct a new PeriodArray from a sequence of Period scalars. 

801 

802 Parameters 

803 ---------- 

804 data : Sequence of Period objects 

805 A sequence of Period objects. These are required to all have 

806 the same ``freq.`` Missing values can be indicated by ``None`` 

807 or ``pandas.NaT``. 

808 freq : str, Tick, or Offset 

809 The frequency of every element of the array. This can be specified 

810 to avoid inferring the `freq` from `data`. 

811 copy : bool, default False 

812 Whether to ensure a copy of the data is made. 

813 

814 Returns 

815 ------- 

816 PeriodArray 

817 

818 See Also 

819 -------- 

820 PeriodArray 

821 pandas.PeriodIndex 

822 

823 Examples 

824 -------- 

825 >>> period_array([pd.Period('2017', freq='A'), 

826 ... pd.Period('2018', freq='A')]) 

827 <PeriodArray> 

828 ['2017', '2018'] 

829 Length: 2, dtype: period[A-DEC] 

830 

831 >>> period_array([pd.Period('2017', freq='A'), 

832 ... pd.Period('2018', freq='A'), 

833 ... pd.NaT]) 

834 <PeriodArray> 

835 ['2017', '2018', 'NaT'] 

836 Length: 3, dtype: period[A-DEC] 

837 

838 Integers that look like years are handled 

839 

840 >>> period_array([2000, 2001, 2002], freq='D') 

841 ['2000-01-01', '2001-01-01', '2002-01-01'] 

842 Length: 3, dtype: period[D] 

843 

844 Datetime-like strings may also be passed 

845 

846 >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') 

847 <PeriodArray> 

848 ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] 

849 Length: 4, dtype: period[Q-DEC] 

850 """ 

851 if is_datetime64_dtype(data): 

852 return PeriodArray._from_datetime64(data, freq) 

853 if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): 

854 return PeriodArray(data, freq) 

855 

856 # other iterable of some kind 

857 if not isinstance(data, (np.ndarray, list, tuple)): 

858 data = list(data) 

859 

860 data = np.asarray(data) 

861 

862 dtype: Optional[PeriodDtype] 

863 if freq: 

864 dtype = PeriodDtype(freq) 

865 else: 

866 dtype = None 

867 

868 if is_float_dtype(data) and len(data) > 0: 

869 raise TypeError("PeriodIndex does not allow floating point in construction") 

870 

871 data = ensure_object(data) 

872 

873 return PeriodArray._from_sequence(data, dtype=dtype) 

874 

875 

876def validate_dtype_freq(dtype, freq): 

877 """ 

878 If both a dtype and a freq are available, ensure they match. If only 

879 dtype is available, extract the implied freq. 

880 

881 Parameters 

882 ---------- 

883 dtype : dtype 

884 freq : DateOffset or None 

885 

886 Returns 

887 ------- 

888 freq : DateOffset 

889 

890 Raises 

891 ------ 

892 ValueError : non-period dtype 

893 IncompatibleFrequency : mismatch between dtype and freq 

894 """ 

895 if freq is not None: 

896 freq = frequencies.to_offset(freq) 

897 

898 if dtype is not None: 

899 dtype = pandas_dtype(dtype) 

900 if not is_period_dtype(dtype): 

901 raise ValueError("dtype must be PeriodDtype") 

902 if freq is None: 

903 freq = dtype.freq 

904 elif freq != dtype.freq: 

905 raise IncompatibleFrequency("specified freq and dtype are different") 

906 return freq 

907 

908 

909def dt64arr_to_periodarr(data, freq, tz=None): 

910 """ 

911 Convert an datetime-like array to values Period ordinals. 

912 

913 Parameters 

914 ---------- 

915 data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] 

916 freq : Optional[Union[str, Tick]] 

917 Must match the `freq` on the `data` if `data` is a DatetimeIndex 

918 or Series. 

919 tz : Optional[tzinfo] 

920 

921 Returns 

922 ------- 

923 ordinals : ndarray[int] 

924 freq : Tick 

925 The frequency extracted from the Series or DatetimeIndex if that's 

926 used. 

927 

928 """ 

929 if data.dtype != np.dtype("M8[ns]"): 

930 raise ValueError(f"Wrong dtype: {data.dtype}") 

931 

932 if freq is None: 

933 if isinstance(data, ABCIndexClass): 

934 data, freq = data._values, data.freq 

935 elif isinstance(data, ABCSeries): 

936 data, freq = data._values, data.dt.freq 

937 

938 freq = Period._maybe_convert_freq(freq) 

939 

940 if isinstance(data, (ABCIndexClass, ABCSeries)): 

941 data = data._values 

942 

943 base, mult = libfrequencies.get_freq_code(freq) 

944 return libperiod.dt64arr_to_periodarr(data.view("i8"), base, tz), freq 

945 

946 

947def _get_ordinal_range(start, end, periods, freq, mult=1): 

948 if com.count_not_none(start, end, periods) != 2: 

949 raise ValueError( 

950 "Of the three parameters: start, end, and periods, " 

951 "exactly two must be specified" 

952 ) 

953 

954 if freq is not None: 

955 _, mult = libfrequencies.get_freq_code(freq) 

956 

957 if start is not None: 

958 start = Period(start, freq) 

959 if end is not None: 

960 end = Period(end, freq) 

961 

962 is_start_per = isinstance(start, Period) 

963 is_end_per = isinstance(end, Period) 

964 

965 if is_start_per and is_end_per and start.freq != end.freq: 

966 raise ValueError("start and end must have same freq") 

967 if start is NaT or end is NaT: 

968 raise ValueError("start and end must not be NaT") 

969 

970 if freq is None: 

971 if is_start_per: 

972 freq = start.freq 

973 elif is_end_per: 

974 freq = end.freq 

975 else: # pragma: no cover 

976 raise ValueError("Could not infer freq from start/end") 

977 

978 if periods is not None: 

979 periods = periods * mult 

980 if start is None: 

981 data = np.arange( 

982 end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64 

983 ) 

984 else: 

985 data = np.arange( 

986 start.ordinal, start.ordinal + periods, mult, dtype=np.int64 

987 ) 

988 else: 

989 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) 

990 

991 return data, freq 

992 

993 

994def _range_from_fields( 

995 year=None, 

996 month=None, 

997 quarter=None, 

998 day=None, 

999 hour=None, 

1000 minute=None, 

1001 second=None, 

1002 freq=None, 

1003): 

1004 if hour is None: 

1005 hour = 0 

1006 if minute is None: 

1007 minute = 0 

1008 if second is None: 

1009 second = 0 

1010 if day is None: 

1011 day = 1 

1012 

1013 ordinals = [] 

1014 

1015 if quarter is not None: 

1016 if freq is None: 

1017 freq = "Q" 

1018 base = libfrequencies.FreqGroup.FR_QTR 

1019 else: 

1020 base, mult = libfrequencies.get_freq_code(freq) 

1021 if base != libfrequencies.FreqGroup.FR_QTR: 

1022 raise AssertionError("base must equal FR_QTR") 

1023 

1024 year, quarter = _make_field_arrays(year, quarter) 

1025 for y, q in zip(year, quarter): 

1026 y, m = libperiod.quarter_to_myear(y, q, freq) 

1027 val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) 

1028 ordinals.append(val) 

1029 else: 

1030 base, mult = libfrequencies.get_freq_code(freq) 

1031 arrays = _make_field_arrays(year, month, day, hour, minute, second) 

1032 for y, mth, d, h, mn, s in zip(*arrays): 

1033 ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) 

1034 

1035 return np.array(ordinals, dtype=np.int64), freq 

1036 

1037 

1038def _make_field_arrays(*fields): 

1039 length = None 

1040 for x in fields: 

1041 if isinstance(x, (list, np.ndarray, ABCSeries)): 

1042 if length is not None and len(x) != length: 

1043 raise ValueError("Mismatched Period array lengths") 

1044 elif length is None: 

1045 length = len(x) 

1046 

1047 arrays = [ 

1048 np.asarray(x) 

1049 if isinstance(x, (np.ndarray, list, ABCSeries)) 

1050 else np.repeat(x, length) 

1051 for x in fields 

1052 ] 

1053 

1054 return arrays