Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from operator import le, lt 

2import textwrap 

3 

4import numpy as np 

5 

6from pandas._config import get_option 

7 

8from pandas._libs.interval import Interval, IntervalMixin, intervals_to_interval_bounds 

9from pandas.compat.numpy import function as nv 

10from pandas.util._decorators import Appender 

11 

12from pandas.core.dtypes.cast import maybe_convert_platform 

13from pandas.core.dtypes.common import ( 

14 is_categorical_dtype, 

15 is_datetime64_any_dtype, 

16 is_float_dtype, 

17 is_integer_dtype, 

18 is_interval, 

19 is_interval_dtype, 

20 is_list_like, 

21 is_object_dtype, 

22 is_scalar, 

23 is_string_dtype, 

24 is_timedelta64_dtype, 

25 pandas_dtype, 

26) 

27from pandas.core.dtypes.dtypes import IntervalDtype 

28from pandas.core.dtypes.generic import ( 

29 ABCDatetimeIndex, 

30 ABCExtensionArray, 

31 ABCIndexClass, 

32 ABCInterval, 

33 ABCIntervalIndex, 

34 ABCPeriodIndex, 

35 ABCSeries, 

36) 

37from pandas.core.dtypes.missing import isna, notna 

38 

39from pandas.core.algorithms import take, value_counts 

40from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs 

41from pandas.core.arrays.categorical import Categorical 

42import pandas.core.common as com 

43from pandas.core.construction import array 

44from pandas.core.indexers import check_array_indexer 

45from pandas.core.indexes.base import ensure_index 

46 

47_VALID_CLOSED = {"left", "right", "both", "neither"} 

48_interval_shared_docs = {} 

49 

50_shared_docs_kwargs = dict( 

51 klass="IntervalArray", qualname="arrays.IntervalArray", name="" 

52) 

53 

54 

55_interval_shared_docs[ 

56 "class" 

57] = """ 

58%(summary)s 

59 

60.. versionadded:: %(versionadded)s 

61 

62Parameters 

63---------- 

64data : array-like (1-dimensional) 

65 Array-like containing Interval objects from which to build the 

66 %(klass)s. 

67closed : {'left', 'right', 'both', 'neither'}, default 'right' 

68 Whether the intervals are closed on the left-side, right-side, both or 

69 neither. 

70dtype : dtype or None, default None 

71 If None, dtype will be inferred. 

72 

73 .. versionadded:: 0.23.0 

74copy : bool, default False 

75 Copy the input data. 

76%(name)s\ 

77verify_integrity : bool, default True 

78 Verify that the %(klass)s is valid. 

79 

80Attributes 

81---------- 

82left 

83right 

84closed 

85mid 

86length 

87is_empty 

88is_non_overlapping_monotonic 

89%(extra_attributes)s\ 

90 

91Methods 

92------- 

93from_arrays 

94from_tuples 

95from_breaks 

96contains 

97overlaps 

98set_closed 

99to_tuples 

100%(extra_methods)s\ 

101 

102See Also 

103-------- 

104Index : The base pandas Index type. 

105Interval : A bounded slice-like interval; the elements of an %(klass)s. 

106interval_range : Function to create a fixed frequency IntervalIndex. 

107cut : Bin values into discrete Intervals. 

108qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. 

109 

110Notes 

111----- 

112See the `user guide 

113<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`_ 

114for more. 

115 

116%(examples)s\ 

117""" 

118 

119 

120@Appender( 

121 _interval_shared_docs["class"] 

122 % dict( 

123 klass="IntervalArray", 

124 summary="Pandas array for interval data that are closed on the same side.", 

125 versionadded="0.24.0", 

126 name="", 

127 extra_attributes="", 

128 extra_methods="", 

129 examples=textwrap.dedent( 

130 """\ 

131 Examples 

132 -------- 

133 A new ``IntervalArray`` can be constructed directly from an array-like of 

134 ``Interval`` objects: 

135 

136 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) 

137 <IntervalArray> 

138 [(0, 1], (1, 5]] 

139 Length: 2, closed: right, dtype: interval[int64] 

140 

141 It may also be constructed using one of the constructor 

142 methods: :meth:`IntervalArray.from_arrays`, 

143 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. 

144 """ 

145 ), 

146 ) 

147) 

148class IntervalArray(IntervalMixin, ExtensionArray): 

149 ndim = 1 

150 can_hold_na = True 

151 _na_value = _fill_value = np.nan 

152 

153 def __new__(cls, data, closed=None, dtype=None, copy=False, verify_integrity=True): 

154 

155 if isinstance(data, ABCSeries) and is_interval_dtype(data): 

156 data = data.values 

157 

158 if isinstance(data, (cls, ABCIntervalIndex)): 

159 left = data.left 

160 right = data.right 

161 closed = closed or data.closed 

162 else: 

163 

164 # don't allow scalars 

165 if is_scalar(data): 

166 msg = ( 

167 f"{cls.__name__}(...) must be called with a collection " 

168 f"of some kind, {data} was passed" 

169 ) 

170 raise TypeError(msg) 

171 

172 # might need to convert empty or purely na data 

173 data = maybe_convert_platform_interval(data) 

174 left, right, infer_closed = intervals_to_interval_bounds( 

175 data, validate_closed=closed is None 

176 ) 

177 closed = closed or infer_closed 

178 

179 return cls._simple_new( 

180 left, 

181 right, 

182 closed, 

183 copy=copy, 

184 dtype=dtype, 

185 verify_integrity=verify_integrity, 

186 ) 

187 

188 @classmethod 

189 def _simple_new( 

190 cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True 

191 ): 

192 result = IntervalMixin.__new__(cls) 

193 

194 closed = closed or "right" 

195 left = ensure_index(left, copy=copy) 

196 right = ensure_index(right, copy=copy) 

197 

198 if dtype is not None: 

199 # GH 19262: dtype must be an IntervalDtype to override inferred 

200 dtype = pandas_dtype(dtype) 

201 if not is_interval_dtype(dtype): 

202 msg = f"dtype must be an IntervalDtype, got {dtype}" 

203 raise TypeError(msg) 

204 elif dtype.subtype is not None: 

205 left = left.astype(dtype.subtype) 

206 right = right.astype(dtype.subtype) 

207 

208 # coerce dtypes to match if needed 

209 if is_float_dtype(left) and is_integer_dtype(right): 

210 right = right.astype(left.dtype) 

211 elif is_float_dtype(right) and is_integer_dtype(left): 

212 left = left.astype(right.dtype) 

213 

214 if type(left) != type(right): 

215 msg = ( 

216 f"must not have differing left [{type(left).__name__}] and " 

217 f"right [{type(right).__name__}] types" 

218 ) 

219 raise ValueError(msg) 

220 elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): 

221 # GH 19016 

222 msg = ( 

223 "category, object, and string subtypes are not supported " 

224 "for IntervalArray" 

225 ) 

226 raise TypeError(msg) 

227 elif isinstance(left, ABCPeriodIndex): 

228 msg = "Period dtypes are not supported, use a PeriodIndex instead" 

229 raise ValueError(msg) 

230 elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): 

231 msg = ( 

232 "left and right must have the same time zone, got " 

233 f"'{left.tz}' and '{right.tz}'" 

234 ) 

235 raise ValueError(msg) 

236 

237 result._left = left 

238 result._right = right 

239 result._closed = closed 

240 if verify_integrity: 

241 result._validate() 

242 return result 

243 

244 @classmethod 

245 def _from_sequence(cls, scalars, dtype=None, copy=False): 

246 return cls(scalars, dtype=dtype, copy=copy) 

247 

248 @classmethod 

249 def _from_factorized(cls, values, original): 

250 if len(values) == 0: 

251 # An empty array returns object-dtype here. We can't create 

252 # a new IA from an (empty) object-dtype array, so turn it into the 

253 # correct dtype. 

254 values = values.astype(original.dtype.subtype) 

255 return cls(values, closed=original.closed) 

256 

257 _interval_shared_docs["from_breaks"] = textwrap.dedent( 

258 """ 

259 Construct an %(klass)s from an array of splits. 

260 

261 Parameters 

262 ---------- 

263 breaks : array-like (1-dimensional) 

264 Left and right bounds for each interval. 

265 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

266 Whether the intervals are closed on the left-side, right-side, both 

267 or neither. 

268 copy : bool, default False 

269 Copy the data. 

270 dtype : dtype or None, default None 

271 If None, dtype will be inferred. 

272 

273 .. versionadded:: 0.23.0 

274 

275 Returns 

276 ------- 

277 %(klass)s 

278 

279 See Also 

280 -------- 

281 interval_range : Function to create a fixed frequency IntervalIndex. 

282 %(klass)s.from_arrays : Construct from a left and right array. 

283 %(klass)s.from_tuples : Construct from a sequence of tuples. 

284 

285 %(examples)s\ 

286 """ 

287 ) 

288 

289 @classmethod 

290 @Appender( 

291 _interval_shared_docs["from_breaks"] 

292 % dict( 

293 klass="IntervalArray", 

294 examples=textwrap.dedent( 

295 """\ 

296 Examples 

297 -------- 

298 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) 

299 <IntervalArray> 

300 [(0, 1], (1, 2], (2, 3]] 

301 Length: 3, closed: right, dtype: interval[int64] 

302 """ 

303 ), 

304 ) 

305 ) 

306 def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): 

307 breaks = maybe_convert_platform_interval(breaks) 

308 

309 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) 

310 

311 _interval_shared_docs["from_arrays"] = textwrap.dedent( 

312 """ 

313 Construct from two arrays defining the left and right bounds. 

314 

315 Parameters 

316 ---------- 

317 left : array-like (1-dimensional) 

318 Left bounds for each interval. 

319 right : array-like (1-dimensional) 

320 Right bounds for each interval. 

321 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

322 Whether the intervals are closed on the left-side, right-side, both 

323 or neither. 

324 copy : bool, default False 

325 Copy the data. 

326 dtype : dtype, optional 

327 If None, dtype will be inferred. 

328 

329 .. versionadded:: 0.23.0 

330 

331 Returns 

332 ------- 

333 %(klass)s 

334 

335 Raises 

336 ------ 

337 ValueError 

338 When a value is missing in only one of `left` or `right`. 

339 When a value in `left` is greater than the corresponding value 

340 in `right`. 

341 

342 See Also 

343 -------- 

344 interval_range : Function to create a fixed frequency IntervalIndex. 

345 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

346 splits. 

347 %(klass)s.from_tuples : Construct an %(klass)s from an 

348 array-like of tuples. 

349 

350 Notes 

351 ----- 

352 Each element of `left` must be less than or equal to the `right` 

353 element at the same position. If an element is missing, it must be 

354 missing in both `left` and `right`. A TypeError is raised when 

355 using an unsupported type for `left` or `right`. At the moment, 

356 'category', 'object', and 'string' subtypes are not supported. 

357 

358 %(examples)s\ 

359 """ 

360 ) 

361 

362 @classmethod 

363 @Appender( 

364 _interval_shared_docs["from_arrays"] 

365 % dict( 

366 klass="IntervalArray", 

367 examples=textwrap.dedent( 

368 """\ 

369 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) 

370 <IntervalArray> 

371 [(0, 1], (1, 2], (2, 3]] 

372 Length: 3, closed: right, dtype: interval[int64] 

373 """ 

374 ), 

375 ) 

376 ) 

377 def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): 

378 left = maybe_convert_platform_interval(left) 

379 right = maybe_convert_platform_interval(right) 

380 

381 return cls._simple_new( 

382 left, right, closed, copy=copy, dtype=dtype, verify_integrity=True 

383 ) 

384 

385 _interval_shared_docs["from_tuples"] = textwrap.dedent( 

386 """ 

387 Construct an %(klass)s from an array-like of tuples. 

388 

389 Parameters 

390 ---------- 

391 data : array-like (1-dimensional) 

392 Array of tuples. 

393 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

394 Whether the intervals are closed on the left-side, right-side, both 

395 or neither. 

396 copy : bool, default False 

397 By-default copy the data, this is compat only and ignored. 

398 dtype : dtype or None, default None 

399 If None, dtype will be inferred. 

400 

401 .. versionadded:: 0.23.0 

402 

403 Returns 

404 ------- 

405 %(klass)s 

406 

407 See Also 

408 -------- 

409 interval_range : Function to create a fixed frequency IntervalIndex. 

410 %(klass)s.from_arrays : Construct an %(klass)s from a left and 

411 right array. 

412 %(klass)s.from_breaks : Construct an %(klass)s from an array of 

413 splits. 

414 

415 %(examples)s\ 

416 """ 

417 ) 

418 

419 @classmethod 

420 @Appender( 

421 _interval_shared_docs["from_tuples"] 

422 % dict( 

423 klass="IntervalArray", 

424 examples=textwrap.dedent( 

425 """\ 

426 Examples 

427 -------- 

428 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) 

429 <IntervalArray> 

430 [(0, 1], (1, 2]] 

431 Length: 2, closed: right, dtype: interval[int64] 

432 """ 

433 ), 

434 ) 

435 ) 

436 def from_tuples(cls, data, closed="right", copy=False, dtype=None): 

437 if len(data): 

438 left, right = [], [] 

439 else: 

440 # ensure that empty data keeps input dtype 

441 left = right = data 

442 

443 for d in data: 

444 if isna(d): 

445 lhs = rhs = np.nan 

446 else: 

447 name = cls.__name__ 

448 try: 

449 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] 

450 lhs, rhs = d 

451 except ValueError: 

452 msg = f"{name}.from_tuples requires tuples of length 2, got {d}" 

453 raise ValueError(msg) 

454 except TypeError: 

455 msg = f"{name}.from_tuples received an invalid item, {d}" 

456 raise TypeError(msg) 

457 left.append(lhs) 

458 right.append(rhs) 

459 

460 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) 

461 

462 def _validate(self): 

463 """Verify that the IntervalArray is valid. 

464 

465 Checks that 

466 

467 * closed is valid 

468 * left and right match lengths 

469 * left and right have the same missing values 

470 * left is always below right 

471 """ 

472 if self.closed not in _VALID_CLOSED: 

473 msg = f"invalid option for 'closed': {self.closed}" 

474 raise ValueError(msg) 

475 if len(self.left) != len(self.right): 

476 msg = "left and right must have the same length" 

477 raise ValueError(msg) 

478 left_mask = notna(self.left) 

479 right_mask = notna(self.right) 

480 if not (left_mask == right_mask).all(): 

481 msg = ( 

482 "missing values must be missing in the same " 

483 "location both left and right sides" 

484 ) 

485 raise ValueError(msg) 

486 if not (self.left[left_mask] <= self.right[left_mask]).all(): 

487 msg = "left side of interval must be <= right side" 

488 raise ValueError(msg) 

489 

490 # --------- 

491 # Interface 

492 # --------- 

493 def __iter__(self): 

494 return iter(np.asarray(self)) 

495 

496 def __len__(self) -> int: 

497 return len(self.left) 

498 

499 def __getitem__(self, value): 

500 value = check_array_indexer(self, value) 

501 left = self.left[value] 

502 right = self.right[value] 

503 

504 # scalar 

505 if not isinstance(left, ABCIndexClass): 

506 if is_scalar(left) and isna(left): 

507 return self._fill_value 

508 if np.ndim(left) > 1: 

509 # GH#30588 multi-dimensional indexer disallowed 

510 raise ValueError("multi-dimensional indexing not allowed") 

511 return Interval(left, right, self.closed) 

512 

513 return self._shallow_copy(left, right) 

514 

515 def __setitem__(self, key, value): 

516 # na value: need special casing to set directly on numpy arrays 

517 needs_float_conversion = False 

518 if is_scalar(value) and isna(value): 

519 if is_integer_dtype(self.dtype.subtype): 

520 # can't set NaN on a numpy integer array 

521 needs_float_conversion = True 

522 elif is_datetime64_any_dtype(self.dtype.subtype): 

523 # need proper NaT to set directly on the numpy array 

524 value = np.datetime64("NaT") 

525 elif is_timedelta64_dtype(self.dtype.subtype): 

526 # need proper NaT to set directly on the numpy array 

527 value = np.timedelta64("NaT") 

528 value_left, value_right = value, value 

529 

530 # scalar interval 

531 elif is_interval_dtype(value) or isinstance(value, ABCInterval): 

532 self._check_closed_matches(value, name="value") 

533 value_left, value_right = value.left, value.right 

534 

535 else: 

536 # list-like of intervals 

537 try: 

538 array = IntervalArray(value) 

539 value_left, value_right = array.left, array.right 

540 except TypeError: 

541 # wrong type: not interval or NA 

542 msg = f"'value' should be an interval type, got {type(value)} instead." 

543 raise TypeError(msg) 

544 

545 key = check_array_indexer(self, key) 

546 # Need to ensure that left and right are updated atomically, so we're 

547 # forced to copy, update the copy, and swap in the new values. 

548 left = self.left.copy(deep=True) 

549 if needs_float_conversion: 

550 left = left.astype("float") 

551 left.values[key] = value_left 

552 self._left = left 

553 

554 right = self.right.copy(deep=True) 

555 if needs_float_conversion: 

556 right = right.astype("float") 

557 right.values[key] = value_right 

558 self._right = right 

559 

560 def __eq__(self, other): 

561 # ensure pandas array for list-like and eliminate non-interval scalars 

562 if is_list_like(other): 

563 if len(self) != len(other): 

564 raise ValueError("Lengths must match to compare") 

565 other = array(other) 

566 elif not isinstance(other, Interval): 

567 # non-interval scalar -> no matches 

568 return np.zeros(len(self), dtype=bool) 

569 

570 # determine the dtype of the elements we want to compare 

571 if isinstance(other, Interval): 

572 other_dtype = "interval" 

573 elif not is_categorical_dtype(other): 

574 other_dtype = other.dtype 

575 else: 

576 # for categorical defer to categories for dtype 

577 other_dtype = other.categories.dtype 

578 

579 # extract intervals if we have interval categories with matching closed 

580 if is_interval_dtype(other_dtype): 

581 if self.closed != other.categories.closed: 

582 return np.zeros(len(self), dtype=bool) 

583 other = other.categories.take(other.codes) 

584 

585 # interval-like -> need same closed and matching endpoints 

586 if is_interval_dtype(other_dtype): 

587 if self.closed != other.closed: 

588 return np.zeros(len(self), dtype=bool) 

589 return (self.left == other.left) & (self.right == other.right) 

590 

591 # non-interval/non-object dtype -> no matches 

592 if not is_object_dtype(other_dtype): 

593 return np.zeros(len(self), dtype=bool) 

594 

595 # object dtype -> iteratively check for intervals 

596 result = np.zeros(len(self), dtype=bool) 

597 for i, obj in enumerate(other): 

598 # need object to be an Interval with same closed and endpoints 

599 if ( 

600 isinstance(obj, Interval) 

601 and self.closed == obj.closed 

602 and self.left[i] == obj.left 

603 and self.right[i] == obj.right 

604 ): 

605 result[i] = True 

606 

607 return result 

608 

609 def __ne__(self, other): 

610 return ~self.__eq__(other) 

611 

612 def fillna(self, value=None, method=None, limit=None): 

613 """ 

614 Fill NA/NaN values using the specified method. 

615 

616 Parameters 

617 ---------- 

618 value : scalar, dict, Series 

619 If a scalar value is passed it is used to fill all missing values. 

620 Alternatively, a Series or dict can be used to fill in different 

621 values for each index. The value should not be a list. The 

622 value(s) passed should be either Interval objects or NA/NaN. 

623 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

624 (Not implemented yet for IntervalArray) 

625 Method to use for filling holes in reindexed Series 

626 limit : int, default None 

627 (Not implemented yet for IntervalArray) 

628 If method is specified, this is the maximum number of consecutive 

629 NaN values to forward/backward fill. In other words, if there is 

630 a gap with more than this number of consecutive NaNs, it will only 

631 be partially filled. If method is not specified, this is the 

632 maximum number of entries along the entire axis where NaNs will be 

633 filled. 

634 

635 Returns 

636 ------- 

637 filled : IntervalArray with NA/NaN filled 

638 """ 

639 if method is not None: 

640 raise TypeError("Filling by method is not supported for IntervalArray.") 

641 if limit is not None: 

642 raise TypeError("limit is not supported for IntervalArray.") 

643 

644 if not isinstance(value, ABCInterval): 

645 msg = ( 

646 "'IntervalArray.fillna' only supports filling with a " 

647 f"scalar 'pandas.Interval'. Got a '{type(value).__name__}' instead." 

648 ) 

649 raise TypeError(msg) 

650 

651 value = getattr(value, "_values", value) 

652 self._check_closed_matches(value, name="value") 

653 

654 left = self.left.fillna(value=value.left) 

655 right = self.right.fillna(value=value.right) 

656 return self._shallow_copy(left, right) 

657 

658 @property 

659 def dtype(self): 

660 return IntervalDtype(self.left.dtype) 

661 

662 def astype(self, dtype, copy=True): 

663 """ 

664 Cast to an ExtensionArray or NumPy array with dtype 'dtype'. 

665 

666 Parameters 

667 ---------- 

668 dtype : str or dtype 

669 Typecode or data-type to which the array is cast. 

670 

671 copy : bool, default True 

672 Whether to copy the data, even if not necessary. If False, 

673 a copy is made only if the old dtype does not match the 

674 new dtype. 

675 

676 Returns 

677 ------- 

678 array : ExtensionArray or ndarray 

679 ExtensionArray or NumPy ndarray with 'dtype' for its dtype. 

680 """ 

681 dtype = pandas_dtype(dtype) 

682 if is_interval_dtype(dtype): 

683 if dtype == self.dtype: 

684 return self.copy() if copy else self 

685 

686 # need to cast to different subtype 

687 try: 

688 new_left = self.left.astype(dtype.subtype) 

689 new_right = self.right.astype(dtype.subtype) 

690 except TypeError: 

691 msg = ( 

692 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" 

693 ) 

694 raise TypeError(msg) 

695 return self._shallow_copy(new_left, new_right) 

696 elif is_categorical_dtype(dtype): 

697 return Categorical(np.asarray(self)) 

698 # TODO: This try/except will be repeated. 

699 try: 

700 return np.asarray(self).astype(dtype, copy=copy) 

701 except (TypeError, ValueError): 

702 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" 

703 raise TypeError(msg) 

704 

705 @classmethod 

706 def _concat_same_type(cls, to_concat): 

707 """ 

708 Concatenate multiple IntervalArray 

709 

710 Parameters 

711 ---------- 

712 to_concat : sequence of IntervalArray 

713 

714 Returns 

715 ------- 

716 IntervalArray 

717 """ 

718 closed = {interval.closed for interval in to_concat} 

719 if len(closed) != 1: 

720 raise ValueError("Intervals must all be closed on the same side.") 

721 closed = closed.pop() 

722 

723 left = np.concatenate([interval.left for interval in to_concat]) 

724 right = np.concatenate([interval.right for interval in to_concat]) 

725 return cls._simple_new(left, right, closed=closed, copy=False) 

726 

727 def _shallow_copy(self, left=None, right=None, closed=None): 

728 """ 

729 Return a new IntervalArray with the replacement attributes 

730 

731 Parameters 

732 ---------- 

733 left : array-like 

734 Values to be used for the left-side of the the intervals. 

735 If None, the existing left and right values will be used. 

736 

737 right : array-like 

738 Values to be used for the right-side of the the intervals. 

739 If None and left is IntervalArray-like, the left and right 

740 of the IntervalArray-like will be used. 

741 

742 closed : {'left', 'right', 'both', 'neither'}, optional 

743 Whether the intervals are closed on the left-side, right-side, both 

744 or neither. If None, the existing closed will be used. 

745 """ 

746 if left is None: 

747 

748 # no values passed 

749 left, right = self.left, self.right 

750 

751 elif right is None: 

752 

753 # only single value passed, could be an IntervalArray 

754 # or array of Intervals 

755 if not isinstance(left, (type(self), ABCIntervalIndex)): 

756 left = type(self)(left) 

757 

758 left, right = left.left, left.right 

759 else: 

760 

761 # both left and right are values 

762 pass 

763 

764 closed = closed or self.closed 

765 return self._simple_new(left, right, closed=closed, verify_integrity=False) 

766 

767 def copy(self): 

768 """ 

769 Return a copy of the array. 

770 

771 Returns 

772 ------- 

773 IntervalArray 

774 """ 

775 left = self.left.copy(deep=True) 

776 right = self.right.copy(deep=True) 

777 closed = self.closed 

778 # TODO: Could skip verify_integrity here. 

779 return type(self).from_arrays(left, right, closed=closed) 

780 

781 def isna(self): 

782 return isna(self.left) 

783 

784 @property 

785 def nbytes(self) -> int: 

786 return self.left.nbytes + self.right.nbytes 

787 

788 @property 

789 def size(self) -> int: 

790 # Avoid materializing self.values 

791 return self.left.size 

792 

793 def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: 

794 if not len(self) or periods == 0: 

795 return self.copy() 

796 

797 if isna(fill_value): 

798 fill_value = self.dtype.na_value 

799 

800 # ExtensionArray.shift doesn't work for two reasons 

801 # 1. IntervalArray.dtype.na_value may not be correct for the dtype. 

802 # 2. IntervalArray._from_sequence only accepts NaN for missing values, 

803 # not other values like NaT 

804 

805 empty_len = min(abs(periods), len(self)) 

806 if isna(fill_value): 

807 fill_value = self.left._na_value 

808 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) 

809 else: 

810 empty = self._from_sequence([fill_value] * empty_len) 

811 

812 if periods > 0: 

813 a = empty 

814 b = self[:-periods] 

815 else: 

816 a = self[abs(periods) :] 

817 b = empty 

818 return self._concat_same_type([a, b]) 

819 

820 def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): 

821 """ 

822 Take elements from the IntervalArray. 

823 

824 Parameters 

825 ---------- 

826 indices : sequence of integers 

827 Indices to be taken. 

828 

829 allow_fill : bool, default False 

830 How to handle negative values in `indices`. 

831 

832 * False: negative values in `indices` indicate positional indices 

833 from the right (the default). This is similar to 

834 :func:`numpy.take`. 

835 

836 * True: negative values in `indices` indicate 

837 missing values. These values are set to `fill_value`. Any other 

838 other negative values raise a ``ValueError``. 

839 

840 fill_value : Interval or NA, optional 

841 Fill value to use for NA-indices when `allow_fill` is True. 

842 This may be ``None``, in which case the default NA value for 

843 the type, ``self.dtype.na_value``, is used. 

844 

845 For many ExtensionArrays, there will be two representations of 

846 `fill_value`: a user-facing "boxed" scalar, and a low-level 

847 physical NA value. `fill_value` should be the user-facing version, 

848 and the implementation should handle translating that to the 

849 physical version for processing the take if necessary. 

850 

851 axis : any, default None 

852 Present for compat with IntervalIndex; does nothing. 

853 

854 Returns 

855 ------- 

856 IntervalArray 

857 

858 Raises 

859 ------ 

860 IndexError 

861 When the indices are out of bounds for the array. 

862 ValueError 

863 When `indices` contains negative values other than ``-1`` 

864 and `allow_fill` is True. 

865 """ 

866 nv.validate_take(tuple(), kwargs) 

867 

868 fill_left = fill_right = fill_value 

869 if allow_fill: 

870 if fill_value is None: 

871 fill_left = fill_right = self.left._na_value 

872 elif is_interval(fill_value): 

873 self._check_closed_matches(fill_value, name="fill_value") 

874 fill_left, fill_right = fill_value.left, fill_value.right 

875 elif not is_scalar(fill_value) and notna(fill_value): 

876 msg = ( 

877 "'IntervalArray.fillna' only supports filling with a " 

878 "'scalar pandas.Interval or NA'. " 

879 f"Got a '{type(fill_value).__name__}' instead." 

880 ) 

881 raise ValueError(msg) 

882 

883 left_take = take( 

884 self.left, indices, allow_fill=allow_fill, fill_value=fill_left 

885 ) 

886 right_take = take( 

887 self.right, indices, allow_fill=allow_fill, fill_value=fill_right 

888 ) 

889 

890 return self._shallow_copy(left_take, right_take) 

891 

892 def value_counts(self, dropna=True): 

893 """ 

894 Returns a Series containing counts of each interval. 

895 

896 Parameters 

897 ---------- 

898 dropna : bool, default True 

899 Don't include counts of NaN. 

900 

901 Returns 

902 ------- 

903 counts : Series 

904 

905 See Also 

906 -------- 

907 Series.value_counts 

908 """ 

909 # TODO: implement this is a non-naive way! 

910 return value_counts(np.asarray(self), dropna=dropna) 

911 

912 # Formatting 

913 

914 def _format_data(self): 

915 

916 # TODO: integrate with categorical and make generic 

917 # name argument is unused here; just for compat with base / categorical 

918 n = len(self) 

919 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) 

920 

921 formatter = str 

922 

923 if n == 0: 

924 summary = "[]" 

925 elif n == 1: 

926 first = formatter(self[0]) 

927 summary = f"[{first}]" 

928 elif n == 2: 

929 first = formatter(self[0]) 

930 last = formatter(self[-1]) 

931 summary = f"[{first}, {last}]" 

932 else: 

933 

934 if n > max_seq_items: 

935 n = min(max_seq_items // 2, 10) 

936 head = [formatter(x) for x in self[:n]] 

937 tail = [formatter(x) for x in self[-n:]] 

938 head_str = ", ".join(head) 

939 tail_str = ", ".join(tail) 

940 summary = f"[{head_str} ... {tail_str}]" 

941 else: 

942 tail = [formatter(x) for x in self] 

943 tail_str = ", ".join(tail) 

944 summary = f"[{tail_str}]" 

945 

946 return summary 

947 

948 def __repr__(self) -> str: 

949 # the short repr has no trailing newline, while the truncated 

950 # repr does. So we include a newline in our template, and strip 

951 # any trailing newlines from format_object_summary 

952 data = self._format_data() 

953 class_name = f"<{type(self).__name__}>\n" 

954 

955 template = ( 

956 f"{class_name}" 

957 f"{data}\n" 

958 f"Length: {len(self)}, closed: {self.closed}, dtype: {self.dtype}" 

959 ) 

960 return template 

961 

962 def _format_space(self): 

963 space = " " * (len(type(self).__name__) + 1) 

964 return f"\n{space}" 

965 

966 @property 

967 def left(self): 

968 """ 

969 Return the left endpoints of each Interval in the IntervalArray as 

970 an Index. 

971 """ 

972 return self._left 

973 

974 @property 

975 def right(self): 

976 """ 

977 Return the right endpoints of each Interval in the IntervalArray as 

978 an Index. 

979 """ 

980 return self._right 

981 

982 @property 

983 def closed(self): 

984 """ 

985 Whether the intervals are closed on the left-side, right-side, both or 

986 neither. 

987 """ 

988 return self._closed 

989 

990 _interval_shared_docs["set_closed"] = textwrap.dedent( 

991 """ 

992 Return an %(klass)s identical to the current one, but closed on the 

993 specified side. 

994 

995 .. versionadded:: 0.24.0 

996 

997 Parameters 

998 ---------- 

999 closed : {'left', 'right', 'both', 'neither'} 

1000 Whether the intervals are closed on the left-side, right-side, both 

1001 or neither. 

1002 

1003 Returns 

1004 ------- 

1005 new_index : %(klass)s 

1006 

1007 %(examples)s\ 

1008 """ 

1009 ) 

1010 

1011 @Appender( 

1012 _interval_shared_docs["set_closed"] 

1013 % dict( 

1014 klass="IntervalArray", 

1015 examples=textwrap.dedent( 

1016 """\ 

1017 Examples 

1018 -------- 

1019 >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) 

1020 >>> index 

1021 <IntervalArray> 

1022 [(0, 1], (1, 2], (2, 3]] 

1023 Length: 3, closed: right, dtype: interval[int64] 

1024 >>> index.set_closed('both') 

1025 <IntervalArray> 

1026 [[0, 1], [1, 2], [2, 3]] 

1027 Length: 3, closed: both, dtype: interval[int64] 

1028 """ 

1029 ), 

1030 ) 

1031 ) 

1032 def set_closed(self, closed): 

1033 if closed not in _VALID_CLOSED: 

1034 msg = f"invalid option for 'closed': {closed}" 

1035 raise ValueError(msg) 

1036 

1037 return self._shallow_copy(closed=closed) 

1038 

1039 @property 

1040 def length(self): 

1041 """ 

1042 Return an Index with entries denoting the length of each Interval in 

1043 the IntervalArray. 

1044 """ 

1045 try: 

1046 return self.right - self.left 

1047 except TypeError: 

1048 # length not defined for some types, e.g. string 

1049 msg = ( 

1050 "IntervalArray contains Intervals without defined length, " 

1051 "e.g. Intervals with string endpoints" 

1052 ) 

1053 raise TypeError(msg) 

1054 

1055 @property 

1056 def mid(self): 

1057 """ 

1058 Return the midpoint of each Interval in the IntervalArray as an Index. 

1059 """ 

1060 try: 

1061 return 0.5 * (self.left + self.right) 

1062 except TypeError: 

1063 # datetime safe version 

1064 return self.left + 0.5 * self.length 

1065 

1066 _interval_shared_docs[ 

1067 "is_non_overlapping_monotonic" 

1068 ] = """ 

1069 Return True if the %(klass)s is non-overlapping (no Intervals share 

1070 points) and is either monotonic increasing or monotonic decreasing, 

1071 else False. 

1072 """ 

1073 # https://github.com/python/mypy/issues/1362 

1074 # Mypy does not support decorated properties 

1075 @property # type: ignore 

1076 @Appender( 

1077 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs 

1078 ) 

1079 def is_non_overlapping_monotonic(self): 

1080 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) 

1081 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) 

1082 # we already require left <= right 

1083 

1084 # strict inequality for closed == 'both'; equality implies overlapping 

1085 # at a point when both sides of intervals are included 

1086 if self.closed == "both": 

1087 return bool( 

1088 (self.right[:-1] < self.left[1:]).all() 

1089 or (self.left[:-1] > self.right[1:]).all() 

1090 ) 

1091 

1092 # non-strict inequality when closed != 'both'; at least one side is 

1093 # not included in the intervals, so equality does not imply overlapping 

1094 return bool( 

1095 (self.right[:-1] <= self.left[1:]).all() 

1096 or (self.left[:-1] >= self.right[1:]).all() 

1097 ) 

1098 

1099 # Conversion 

1100 def __array__(self, dtype=None) -> np.ndarray: 

1101 """ 

1102 Return the IntervalArray's data as a numpy array of Interval 

1103 objects (with dtype='object') 

1104 """ 

1105 left = self.left 

1106 right = self.right 

1107 mask = self.isna() 

1108 closed = self._closed 

1109 

1110 result = np.empty(len(left), dtype=object) 

1111 for i in range(len(left)): 

1112 if mask[i]: 

1113 result[i] = np.nan 

1114 else: 

1115 result[i] = Interval(left[i], right[i], closed) 

1116 return result 

1117 

1118 def __arrow_array__(self, type=None): 

1119 """ 

1120 Convert myself into a pyarrow Array. 

1121 """ 

1122 import pyarrow 

1123 from pandas.core.arrays._arrow_utils import ArrowIntervalType 

1124 

1125 try: 

1126 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) 

1127 except TypeError: 

1128 raise TypeError( 

1129 "Conversion to arrow with subtype '{}' " 

1130 "is not supported".format(self.dtype.subtype) 

1131 ) 

1132 interval_type = ArrowIntervalType(subtype, self.closed) 

1133 storage_array = pyarrow.StructArray.from_arrays( 

1134 [ 

1135 pyarrow.array(self.left, type=subtype, from_pandas=True), 

1136 pyarrow.array(self.right, type=subtype, from_pandas=True), 

1137 ], 

1138 names=["left", "right"], 

1139 ) 

1140 mask = self.isna() 

1141 if mask.any(): 

1142 # if there are missing values, set validity bitmap also on the array level 

1143 null_bitmap = pyarrow.array(~mask).buffers()[1] 

1144 storage_array = pyarrow.StructArray.from_buffers( 

1145 storage_array.type, 

1146 len(storage_array), 

1147 [null_bitmap], 

1148 children=[storage_array.field(0), storage_array.field(1)], 

1149 ) 

1150 

1151 if type is not None: 

1152 if type.equals(interval_type.storage_type): 

1153 return storage_array 

1154 elif isinstance(type, ArrowIntervalType): 

1155 # ensure we have the same subtype and closed attributes 

1156 if not type.equals(interval_type): 

1157 raise TypeError( 

1158 "Not supported to convert IntervalArray to type with " 

1159 "different 'subtype' ({0} vs {1}) and 'closed' ({2} vs {3}) " 

1160 "attributes".format( 

1161 self.dtype.subtype, type.subtype, self.closed, type.closed 

1162 ) 

1163 ) 

1164 else: 

1165 raise TypeError( 

1166 "Not supported to convert IntervalArray to '{0}' type".format(type) 

1167 ) 

1168 

1169 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) 

1170 

1171 _interval_shared_docs[ 

1172 "to_tuples" 

1173 ] = """ 

1174 Return an %(return_type)s of tuples of the form (left, right). 

1175 

1176 Parameters 

1177 ---------- 

1178 na_tuple : boolean, default True 

1179 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA 

1180 value itself if False, ``nan``. 

1181 

1182 .. versionadded:: 0.23.0 

1183 

1184 Returns 

1185 ------- 

1186 tuples: %(return_type)s 

1187 %(examples)s\ 

1188 """ 

1189 

1190 @Appender( 

1191 _interval_shared_docs["to_tuples"] % dict(return_type="ndarray", examples="") 

1192 ) 

1193 def to_tuples(self, na_tuple=True): 

1194 tuples = com.asarray_tuplesafe(zip(self.left, self.right)) 

1195 if not na_tuple: 

1196 # GH 18756 

1197 tuples = np.where(~self.isna(), tuples, np.nan) 

1198 return tuples 

1199 

1200 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) 

1201 def repeat(self, repeats, axis=None): 

1202 nv.validate_repeat(tuple(), dict(axis=axis)) 

1203 left_repeat = self.left.repeat(repeats) 

1204 right_repeat = self.right.repeat(repeats) 

1205 return self._shallow_copy(left=left_repeat, right=right_repeat) 

1206 

1207 _interval_shared_docs["contains"] = textwrap.dedent( 

1208 """ 

1209 Check elementwise if the Intervals contain the value. 

1210 

1211 Return a boolean mask whether the value is contained in the Intervals 

1212 of the %(klass)s. 

1213 

1214 .. versionadded:: 0.25.0 

1215 

1216 Parameters 

1217 ---------- 

1218 other : scalar 

1219 The value to check whether it is contained in the Intervals. 

1220 

1221 Returns 

1222 ------- 

1223 boolean array 

1224 

1225 See Also 

1226 -------- 

1227 Interval.contains : Check whether Interval object contains value. 

1228 %(klass)s.overlaps : Check if an Interval overlaps the values in the 

1229 %(klass)s. 

1230 

1231 Examples 

1232 -------- 

1233 %(examples)s 

1234 >>> intervals.contains(0.5) 

1235 array([ True, False, False]) 

1236 """ 

1237 ) 

1238 

1239 @Appender( 

1240 _interval_shared_docs["contains"] 

1241 % dict( 

1242 klass="IntervalArray", 

1243 examples=textwrap.dedent( 

1244 """\ 

1245 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) 

1246 >>> intervals 

1247 <IntervalArray> 

1248 [(0, 1], (1, 3], (2, 4]] 

1249 Length: 3, closed: right, dtype: interval[int64] 

1250 """ 

1251 ), 

1252 ) 

1253 ) 

1254 def contains(self, other): 

1255 if isinstance(other, Interval): 

1256 raise NotImplementedError("contains not implemented for two intervals") 

1257 

1258 return (self.left < other if self.open_left else self.left <= other) & ( 

1259 other < self.right if self.open_right else other <= self.right 

1260 ) 

1261 

1262 _interval_shared_docs["overlaps"] = textwrap.dedent( 

1263 """ 

1264 Check elementwise if an Interval overlaps the values in the %(klass)s. 

1265 

1266 Two intervals overlap if they share a common point, including closed 

1267 endpoints. Intervals that only have an open endpoint in common do not 

1268 overlap. 

1269 

1270 .. versionadded:: 0.24.0 

1271 

1272 Parameters 

1273 ---------- 

1274 other : %(klass)s 

1275 Interval to check against for an overlap. 

1276 

1277 Returns 

1278 ------- 

1279 ndarray 

1280 Boolean array positionally indicating where an overlap occurs. 

1281 

1282 See Also 

1283 -------- 

1284 Interval.overlaps : Check whether two Interval objects overlap. 

1285 

1286 Examples 

1287 -------- 

1288 %(examples)s 

1289 >>> intervals.overlaps(pd.Interval(0.5, 1.5)) 

1290 array([ True, True, False]) 

1291 

1292 Intervals that share closed endpoints overlap: 

1293 

1294 >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) 

1295 array([ True, True, True]) 

1296 

1297 Intervals that only have an open endpoint in common do not overlap: 

1298 

1299 >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) 

1300 array([False, True, False]) 

1301 """ 

1302 ) 

1303 

1304 @Appender( 

1305 _interval_shared_docs["overlaps"] 

1306 % dict( 

1307 klass="IntervalArray", 

1308 examples=textwrap.dedent( 

1309 """\ 

1310 >>> data = [(0, 1), (1, 3), (2, 4)] 

1311 >>> intervals = pd.arrays.IntervalArray.from_tuples(data) 

1312 >>> intervals 

1313 <IntervalArray> 

1314 [(0, 1], (1, 3], (2, 4]] 

1315 Length: 3, closed: right, dtype: interval[int64] 

1316 """ 

1317 ), 

1318 ) 

1319 ) 

1320 def overlaps(self, other): 

1321 if isinstance(other, (IntervalArray, ABCIntervalIndex)): 

1322 raise NotImplementedError 

1323 elif not isinstance(other, Interval): 

1324 msg = f"`other` must be Interval-like, got {type(other).__name__}" 

1325 raise TypeError(msg) 

1326 

1327 # equality is okay if both endpoints are closed (overlap at a point) 

1328 op1 = le if (self.closed_left and other.closed_right) else lt 

1329 op2 = le if (other.closed_left and self.closed_right) else lt 

1330 

1331 # overlaps is equivalent negation of two interval being disjoint: 

1332 # disjoint = (A.left > B.right) or (B.left > A.right) 

1333 # (simplifying the negation allows this to be done in less operations) 

1334 return op1(self.left, other.right) & op2(other.left, self.right) 

1335 

1336 

1337def maybe_convert_platform_interval(values): 

1338 """ 

1339 Try to do platform conversion, with special casing for IntervalArray. 

1340 Wrapper around maybe_convert_platform that alters the default return 

1341 dtype in certain cases to be compatible with IntervalArray. For example, 

1342 empty lists return with integer dtype instead of object dtype, which is 

1343 prohibited for IntervalArray. 

1344 

1345 Parameters 

1346 ---------- 

1347 values : array-like 

1348 

1349 Returns 

1350 ------- 

1351 array 

1352 """ 

1353 if isinstance(values, (list, tuple)) and len(values) == 0: 

1354 # GH 19016 

1355 # empty lists/tuples get object dtype by default, but this is 

1356 # prohibited for IntervalArray, so coerce to integer instead 

1357 return np.array([], dtype=np.int64) 

1358 elif is_categorical_dtype(values): 

1359 values = np.asarray(values) 

1360 

1361 return maybe_convert_platform(values)