Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import datetime 

2from sys import getsizeof 

3from typing import Hashable, List, Optional, Sequence, Union 

4import warnings 

5 

6import numpy as np 

7 

8from pandas._config import get_option 

9 

10from pandas._libs import Timestamp, algos as libalgos, index as libindex, lib, tslibs 

11from pandas._libs.hashtable import duplicated_int64 

12from pandas.compat.numpy import function as nv 

13from pandas.errors import PerformanceWarning, UnsortedIndexError 

14from pandas.util._decorators import Appender, cache_readonly 

15 

16from pandas.core.dtypes.cast import coerce_indexer_dtype 

17from pandas.core.dtypes.common import ( 

18 ensure_int64, 

19 ensure_platform_int, 

20 is_categorical_dtype, 

21 is_hashable, 

22 is_integer, 

23 is_iterator, 

24 is_list_like, 

25 is_object_dtype, 

26 is_scalar, 

27 pandas_dtype, 

28) 

29from pandas.core.dtypes.dtypes import ExtensionDtype 

30from pandas.core.dtypes.generic import ABCDataFrame 

31from pandas.core.dtypes.missing import array_equivalent, isna 

32 

33import pandas.core.algorithms as algos 

34from pandas.core.arrays import Categorical 

35from pandas.core.arrays.categorical import factorize_from_iterables 

36import pandas.core.common as com 

37import pandas.core.indexes.base as ibase 

38from pandas.core.indexes.base import ( 

39 Index, 

40 InvalidIndexError, 

41 _index_shared_docs, 

42 ensure_index, 

43) 

44from pandas.core.indexes.frozen import FrozenList 

45import pandas.core.missing as missing 

46from pandas.core.sorting import ( 

47 get_group_index, 

48 indexer_from_factorized, 

49 lexsort_indexer, 

50) 

51from pandas.core.util.hashing import hash_tuple, hash_tuples 

52 

53from pandas.io.formats.printing import ( 

54 format_object_attrs, 

55 format_object_summary, 

56 pprint_thing, 

57) 

58 

59_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

60_index_doc_kwargs.update( 

61 dict(klass="MultiIndex", target_klass="MultiIndex or list of tuples") 

62) 

63 

64 

65class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): 

66 """ 

67 This class manages a MultiIndex by mapping label combinations to positive 

68 integers. 

69 """ 

70 

71 _base = libindex.UInt64Engine 

72 

73 def _codes_to_ints(self, codes): 

74 """ 

75 Transform combination(s) of uint64 in one uint64 (each), in a strictly 

76 monotonic way (i.e. respecting the lexicographic order of integer 

77 combinations): see BaseMultiIndexCodesEngine documentation. 

78 

79 Parameters 

80 ---------- 

81 codes : 1- or 2-dimensional array of dtype uint64 

82 Combinations of integers (one per row) 

83 

84 Returns 

85 ------- 

86 scalar or 1-dimensional array, of dtype uint64 

87 Integer(s) representing one combination (each). 

88 """ 

89 # Shift the representation of each level by the pre-calculated number 

90 # of bits: 

91 codes <<= self.offsets 

92 

93 # Now sum and OR are in fact interchangeable. This is a simple 

94 # composition of the (disjunct) significant bits of each level (i.e. 

95 # each column in "codes") in a single positive integer: 

96 if codes.ndim == 1: 

97 # Single key 

98 return np.bitwise_or.reduce(codes) 

99 

100 # Multiple keys 

101 return np.bitwise_or.reduce(codes, axis=1) 

102 

103 

104class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): 

105 """ 

106 This class manages those (extreme) cases in which the number of possible 

107 label combinations overflows the 64 bits integers, and uses an ObjectEngine 

108 containing Python integers. 

109 """ 

110 

111 _base = libindex.ObjectEngine 

112 

113 def _codes_to_ints(self, codes): 

114 """ 

115 Transform combination(s) of uint64 in one Python integer (each), in a 

116 strictly monotonic way (i.e. respecting the lexicographic order of 

117 integer combinations): see BaseMultiIndexCodesEngine documentation. 

118 

119 Parameters 

120 ---------- 

121 codes : 1- or 2-dimensional array of dtype uint64 

122 Combinations of integers (one per row) 

123 

124 Returns 

125 ------- 

126 int, or 1-dimensional array of dtype object 

127 Integer(s) representing one combination (each). 

128 """ 

129 

130 # Shift the representation of each level by the pre-calculated number 

131 # of bits. Since this can overflow uint64, first make sure we are 

132 # working with Python integers: 

133 codes = codes.astype("object") << self.offsets 

134 

135 # Now sum and OR are in fact interchangeable. This is a simple 

136 # composition of the (disjunct) significant bits of each level (i.e. 

137 # each column in "codes") in a single positive integer (per row): 

138 if codes.ndim == 1: 

139 # Single key 

140 return np.bitwise_or.reduce(codes) 

141 

142 # Multiple keys 

143 return np.bitwise_or.reduce(codes, axis=1) 

144 

145 

146class MultiIndex(Index): 

147 """ 

148 A multi-level, or hierarchical, index object for pandas objects. 

149 

150 Parameters 

151 ---------- 

152 levels : sequence of arrays 

153 The unique labels for each level. 

154 codes : sequence of arrays 

155 Integers for each level designating which label at each location. 

156 

157 .. versionadded:: 0.24.0 

158 sortorder : optional int 

159 Level of sortedness (must be lexicographically sorted by that 

160 level). 

161 names : optional sequence of objects 

162 Names for each of the index levels. (name is accepted for compat). 

163 copy : bool, default False 

164 Copy the meta-data. 

165 verify_integrity : bool, default True 

166 Check that the levels/codes are consistent and valid. 

167 

168 Attributes 

169 ---------- 

170 names 

171 levels 

172 codes 

173 nlevels 

174 levshape 

175 

176 Methods 

177 ------- 

178 from_arrays 

179 from_tuples 

180 from_product 

181 from_frame 

182 set_levels 

183 set_codes 

184 to_frame 

185 to_flat_index 

186 is_lexsorted 

187 sortlevel 

188 droplevel 

189 swaplevel 

190 reorder_levels 

191 remove_unused_levels 

192 get_locs 

193 

194 See Also 

195 -------- 

196 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

197 MultiIndex.from_product : Create a MultiIndex from the cartesian product 

198 of iterables. 

199 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. 

200 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

201 Index : The base pandas Index type. 

202 

203 Notes 

204 ----- 

205 See the `user guide 

206 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`_ 

207 for more. 

208 

209 Examples 

210 -------- 

211 A new ``MultiIndex`` is typically constructed using one of the helper 

212 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` 

213 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): 

214 

215 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

216 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

217 MultiIndex([(1, 'red'), 

218 (1, 'blue'), 

219 (2, 'red'), 

220 (2, 'blue')], 

221 names=['number', 'color']) 

222 

223 See further examples for how to construct a MultiIndex in the doc strings 

224 of the mentioned helper methods. 

225 """ 

226 

227 _deprecations = Index._deprecations | frozenset() 

228 

229 # initialize to zero-length tuples to make everything work 

230 _typ = "multiindex" 

231 _names = FrozenList() 

232 _levels = FrozenList() 

233 _codes = FrozenList() 

234 _comparables = ["names"] 

235 rename = Index.set_names 

236 

237 # -------------------------------------------------------------------- 

238 # Constructors 

239 

240 def __new__( 

241 cls, 

242 levels=None, 

243 codes=None, 

244 sortorder=None, 

245 names=None, 

246 dtype=None, 

247 copy=False, 

248 name=None, 

249 verify_integrity: bool = True, 

250 _set_identity: bool = True, 

251 ): 

252 

253 # compat with Index 

254 if name is not None: 

255 names = name 

256 if levels is None or codes is None: 

257 raise TypeError("Must pass both levels and codes") 

258 if len(levels) != len(codes): 

259 raise ValueError("Length of levels and codes must be the same.") 

260 if len(levels) == 0: 

261 raise ValueError("Must pass non-zero number of levels/codes") 

262 

263 result = object.__new__(MultiIndex) 

264 

265 # we've already validated levels and codes, so shortcut here 

266 result._set_levels(levels, copy=copy, validate=False) 

267 result._set_codes(codes, copy=copy, validate=False) 

268 

269 result._names = [None] * len(levels) 

270 if names is not None: 

271 # handles name validation 

272 result._set_names(names) 

273 

274 if sortorder is not None: 

275 result.sortorder = int(sortorder) 

276 else: 

277 result.sortorder = sortorder 

278 

279 if verify_integrity: 

280 new_codes = result._verify_integrity() 

281 result._codes = new_codes 

282 

283 if _set_identity: 

284 result._reset_identity() 

285 

286 return result 

287 

288 def _validate_codes(self, level: List, code: List): 

289 """ 

290 Reassign code values as -1 if their corresponding levels are NaN. 

291 

292 Parameters 

293 ---------- 

294 code : list 

295 Code to reassign. 

296 level : list 

297 Level to check for missing values (NaN, NaT, None). 

298 

299 Returns 

300 ------- 

301 new code where code value = -1 if it corresponds 

302 to a level with missing values (NaN, NaT, None). 

303 """ 

304 null_mask = isna(level) 

305 if np.any(null_mask): 

306 code = np.where(null_mask[code], -1, code) 

307 return code 

308 

309 def _verify_integrity( 

310 self, codes: Optional[List] = None, levels: Optional[List] = None 

311 ): 

312 """ 

313 Parameters 

314 ---------- 

315 codes : optional list 

316 Codes to check for validity. Defaults to current codes. 

317 levels : optional list 

318 Levels to check for validity. Defaults to current levels. 

319 

320 Raises 

321 ------ 

322 ValueError 

323 If length of levels and codes don't match, if the codes for any 

324 level would exceed level bounds, or there are any duplicate levels. 

325 

326 Returns 

327 ------- 

328 new codes where code value = -1 if it corresponds to a 

329 NaN level. 

330 """ 

331 # NOTE: Currently does not check, among other things, that cached 

332 # nlevels matches nor that sortorder matches actually sortorder. 

333 codes = codes or self.codes 

334 levels = levels or self.levels 

335 

336 if len(levels) != len(codes): 

337 raise ValueError( 

338 "Length of levels and codes must match. NOTE: " 

339 "this index is in an inconsistent state." 

340 ) 

341 codes_length = len(codes[0]) 

342 for i, (level, level_codes) in enumerate(zip(levels, codes)): 

343 if len(level_codes) != codes_length: 

344 raise ValueError( 

345 f"Unequal code lengths: {[len(code_) for code_ in codes]}" 

346 ) 

347 if len(level_codes) and level_codes.max() >= len(level): 

348 raise ValueError( 

349 f"On level {i}, code max ({level_codes.max()}) >= length of " 

350 f"level ({len(level)}). NOTE: this index is in an " 

351 "inconsistent state" 

352 ) 

353 if len(level_codes) and level_codes.min() < -1: 

354 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") 

355 if not level.is_unique: 

356 raise ValueError( 

357 f"Level values must be unique: {list(level)} on level {i}" 

358 ) 

359 if self.sortorder is not None: 

360 if self.sortorder > self._lexsort_depth(): 

361 raise ValueError( 

362 "Value for sortorder must be inferior or equal to actual " 

363 f"lexsort_depth: sortorder {self.sortorder} " 

364 f"with lexsort_depth {self._lexsort_depth()}" 

365 ) 

366 

367 codes = [ 

368 self._validate_codes(level, code) for level, code in zip(levels, codes) 

369 ] 

370 new_codes = FrozenList(codes) 

371 return new_codes 

372 

373 @classmethod 

374 def from_arrays(cls, arrays, sortorder=None, names=lib.no_default): 

375 """ 

376 Convert arrays to MultiIndex. 

377 

378 Parameters 

379 ---------- 

380 arrays : list / sequence of array-likes 

381 Each array-like gives one level's value for each data point. 

382 len(arrays) is the number of levels. 

383 sortorder : int or None 

384 Level of sortedness (must be lexicographically sorted by that 

385 level). 

386 names : list / sequence of str, optional 

387 Names for the levels in the index. 

388 

389 Returns 

390 ------- 

391 MultiIndex 

392 

393 See Also 

394 -------- 

395 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

396 MultiIndex.from_product : Make a MultiIndex from cartesian product 

397 of iterables. 

398 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

399 

400 Examples 

401 -------- 

402 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

403 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

404 MultiIndex([(1, 'red'), 

405 (1, 'blue'), 

406 (2, 'red'), 

407 (2, 'blue')], 

408 names=['number', 'color']) 

409 """ 

410 error_msg = "Input must be a list / sequence of array-likes." 

411 if not is_list_like(arrays): 

412 raise TypeError(error_msg) 

413 elif is_iterator(arrays): 

414 arrays = list(arrays) 

415 

416 # Check if elements of array are list-like 

417 for array in arrays: 

418 if not is_list_like(array): 

419 raise TypeError(error_msg) 

420 

421 # Check if lengths of all arrays are equal or not, 

422 # raise ValueError, if not 

423 for i in range(1, len(arrays)): 

424 if len(arrays[i]) != len(arrays[i - 1]): 

425 raise ValueError("all arrays must be same length") 

426 

427 codes, levels = factorize_from_iterables(arrays) 

428 if names is lib.no_default: 

429 names = [getattr(arr, "name", None) for arr in arrays] 

430 

431 return MultiIndex( 

432 levels=levels, 

433 codes=codes, 

434 sortorder=sortorder, 

435 names=names, 

436 verify_integrity=False, 

437 ) 

438 

439 @classmethod 

440 def from_tuples(cls, tuples, sortorder=None, names=None): 

441 """ 

442 Convert list of tuples to MultiIndex. 

443 

444 Parameters 

445 ---------- 

446 tuples : list / sequence of tuple-likes 

447 Each tuple is the index of one row/column. 

448 sortorder : int or None 

449 Level of sortedness (must be lexicographically sorted by that 

450 level). 

451 names : list / sequence of str, optional 

452 Names for the levels in the index. 

453 

454 Returns 

455 ------- 

456 MultiIndex 

457 

458 See Also 

459 -------- 

460 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

461 MultiIndex.from_product : Make a MultiIndex from cartesian product 

462 of iterables. 

463 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

464 

465 Examples 

466 -------- 

467 >>> tuples = [(1, 'red'), (1, 'blue'), 

468 ... (2, 'red'), (2, 'blue')] 

469 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) 

470 MultiIndex([(1, 'red'), 

471 (1, 'blue'), 

472 (2, 'red'), 

473 (2, 'blue')], 

474 names=['number', 'color']) 

475 """ 

476 if not is_list_like(tuples): 

477 raise TypeError("Input must be a list / sequence of tuple-likes.") 

478 elif is_iterator(tuples): 

479 tuples = list(tuples) 

480 

481 if len(tuples) == 0: 

482 if names is None: 

483 raise TypeError("Cannot infer number of levels from empty list") 

484 arrays = [[]] * len(names) 

485 elif isinstance(tuples, (np.ndarray, Index)): 

486 if isinstance(tuples, Index): 

487 tuples = tuples._values 

488 

489 arrays = list(lib.tuples_to_object_array(tuples).T) 

490 elif isinstance(tuples, list): 

491 arrays = list(lib.to_object_array_tuples(tuples).T) 

492 else: 

493 arrays = zip(*tuples) 

494 

495 return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) 

496 

497 @classmethod 

498 def from_product(cls, iterables, sortorder=None, names=lib.no_default): 

499 """ 

500 Make a MultiIndex from the cartesian product of multiple iterables. 

501 

502 Parameters 

503 ---------- 

504 iterables : list / sequence of iterables 

505 Each iterable has unique labels for each level of the index. 

506 sortorder : int or None 

507 Level of sortedness (must be lexicographically sorted by that 

508 level). 

509 names : list / sequence of str, optional 

510 Names for the levels in the index. 

511 

512 .. versionchanged:: 1.0.0 

513 

514 If not explicitly provided, names will be inferred from the 

515 elements of iterables if an element has a name attribute 

516 

517 Returns 

518 ------- 

519 MultiIndex 

520 

521 See Also 

522 -------- 

523 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

524 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

525 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

526 

527 Examples 

528 -------- 

529 >>> numbers = [0, 1, 2] 

530 >>> colors = ['green', 'purple'] 

531 >>> pd.MultiIndex.from_product([numbers, colors], 

532 ... names=['number', 'color']) 

533 MultiIndex([(0, 'green'), 

534 (0, 'purple'), 

535 (1, 'green'), 

536 (1, 'purple'), 

537 (2, 'green'), 

538 (2, 'purple')], 

539 names=['number', 'color']) 

540 """ 

541 from pandas.core.reshape.util import cartesian_product 

542 

543 if not is_list_like(iterables): 

544 raise TypeError("Input must be a list / sequence of iterables.") 

545 elif is_iterator(iterables): 

546 iterables = list(iterables) 

547 

548 codes, levels = factorize_from_iterables(iterables) 

549 if names is lib.no_default: 

550 names = [getattr(it, "name", None) for it in iterables] 

551 

552 codes = cartesian_product(codes) 

553 return MultiIndex(levels, codes, sortorder=sortorder, names=names) 

554 

555 @classmethod 

556 def from_frame(cls, df, sortorder=None, names=None): 

557 """ 

558 Make a MultiIndex from a DataFrame. 

559 

560 .. versionadded:: 0.24.0 

561 

562 Parameters 

563 ---------- 

564 df : DataFrame 

565 DataFrame to be converted to MultiIndex. 

566 sortorder : int, optional 

567 Level of sortedness (must be lexicographically sorted by that 

568 level). 

569 names : list-like, optional 

570 If no names are provided, use the column names, or tuple of column 

571 names if the columns is a MultiIndex. If a sequence, overwrite 

572 names with the given sequence. 

573 

574 Returns 

575 ------- 

576 MultiIndex 

577 The MultiIndex representation of the given DataFrame. 

578 

579 See Also 

580 -------- 

581 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

582 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

583 MultiIndex.from_product : Make a MultiIndex from cartesian product 

584 of iterables. 

585 

586 Examples 

587 -------- 

588 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], 

589 ... ['NJ', 'Temp'], ['NJ', 'Precip']], 

590 ... columns=['a', 'b']) 

591 >>> df 

592 a b 

593 0 HI Temp 

594 1 HI Precip 

595 2 NJ Temp 

596 3 NJ Precip 

597 

598 >>> pd.MultiIndex.from_frame(df) 

599 MultiIndex([('HI', 'Temp'), 

600 ('HI', 'Precip'), 

601 ('NJ', 'Temp'), 

602 ('NJ', 'Precip')], 

603 names=['a', 'b']) 

604 

605 Using explicit names, instead of the column names 

606 

607 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) 

608 MultiIndex([('HI', 'Temp'), 

609 ('HI', 'Precip'), 

610 ('NJ', 'Temp'), 

611 ('NJ', 'Precip')], 

612 names=['state', 'observation']) 

613 """ 

614 if not isinstance(df, ABCDataFrame): 

615 raise TypeError("Input must be a DataFrame") 

616 

617 column_names, columns = zip(*df.items()) 

618 names = column_names if names is None else names 

619 return cls.from_arrays(columns, sortorder=sortorder, names=names) 

620 

621 # -------------------------------------------------------------------- 

622 

623 @property 

624 def _values(self): 

625 # We override here, since our parent uses _data, which we don't use. 

626 return self.values 

627 

628 @property 

629 def shape(self): 

630 """ 

631 Return a tuple of the shape of the underlying data. 

632 """ 

633 # overriding the base Index.shape definition to avoid materializing 

634 # the values (GH-27384, GH-27775) 

635 return (len(self),) 

636 

637 @property 

638 def array(self): 

639 """ 

640 Raises a ValueError for `MultiIndex` because there's no single 

641 array backing a MultiIndex. 

642 

643 Raises 

644 ------ 

645 ValueError 

646 """ 

647 raise ValueError( 

648 "MultiIndex has no single backing array. Use " 

649 "'MultiIndex.to_numpy()' to get a NumPy array of tuples." 

650 ) 

651 

652 # -------------------------------------------------------------------- 

653 # Levels Methods 

654 

655 @cache_readonly 

656 def levels(self): 

657 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly 

658 # create new IndexEngine 

659 # https://github.com/pandas-dev/pandas/issues/31648 

660 result = [ 

661 x._shallow_copy(name=name) for x, name in zip(self._levels, self._names) 

662 ] 

663 for level in result: 

664 # disallow midx.levels[0].name = "foo" 

665 level._no_setting_name = True 

666 return FrozenList(result) 

667 

668 def _set_levels( 

669 self, levels, level=None, copy=False, validate=True, verify_integrity=False 

670 ): 

671 # This is NOT part of the levels property because it should be 

672 # externally not allowed to set levels. User beware if you change 

673 # _levels directly 

674 if validate: 

675 if len(levels) == 0: 

676 raise ValueError("Must set non-zero number of levels.") 

677 if level is None and len(levels) != self.nlevels: 

678 raise ValueError("Length of levels must match number of levels.") 

679 if level is not None and len(levels) != len(level): 

680 raise ValueError("Length of levels must match length of level.") 

681 

682 if level is None: 

683 new_levels = FrozenList( 

684 ensure_index(lev, copy=copy)._shallow_copy() for lev in levels 

685 ) 

686 else: 

687 level_numbers = [self._get_level_number(lev) for lev in level] 

688 new_levels = list(self._levels) 

689 for lev_num, lev in zip(level_numbers, levels): 

690 new_levels[lev_num] = ensure_index(lev, copy=copy)._shallow_copy() 

691 new_levels = FrozenList(new_levels) 

692 

693 if verify_integrity: 

694 new_codes = self._verify_integrity(levels=new_levels) 

695 self._codes = new_codes 

696 

697 names = self.names 

698 self._levels = new_levels 

699 if any(names): 

700 self._set_names(names) 

701 

702 self._tuples = None 

703 self._reset_cache() 

704 

705 def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): 

706 """ 

707 Set new levels on MultiIndex. Defaults to returning new index. 

708 

709 Parameters 

710 ---------- 

711 levels : sequence or list of sequence 

712 New level(s) to apply. 

713 level : int, level name, or sequence of int/level names (default None) 

714 Level(s) to set (None for all levels). 

715 inplace : bool 

716 If True, mutates in place. 

717 verify_integrity : bool, default True 

718 If True, checks that levels and codes are compatible. 

719 

720 Returns 

721 ------- 

722 new index (of same type and class...etc) 

723 

724 Examples 

725 -------- 

726 >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), 

727 (2, 'one'), (2, 'two'), 

728 (3, 'one'), (3, 'two')], 

729 names=['foo', 'bar']) 

730 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) 

731 MultiIndex([('a', 1), 

732 ('a', 2), 

733 ('b', 1), 

734 ('b', 2), 

735 ('c', 1), 

736 ('c', 2)], 

737 names=['foo', 'bar']) 

738 >>> idx.set_levels(['a', 'b', 'c'], level=0) 

739 MultiIndex([('a', 'one'), 

740 ('a', 'two'), 

741 ('b', 'one'), 

742 ('b', 'two'), 

743 ('c', 'one'), 

744 ('c', 'two')], 

745 names=['foo', 'bar']) 

746 >>> idx.set_levels(['a', 'b'], level='bar') 

747 MultiIndex([(1, 'a'), 

748 (1, 'b'), 

749 (2, 'a'), 

750 (2, 'b'), 

751 (3, 'a'), 

752 (3, 'b')], 

753 names=['foo', 'bar']) 

754 

755 If any of the levels passed to ``set_levels()`` exceeds the 

756 existing length, all of the values from that argument will 

757 be stored in the MultiIndex levels, though the values will 

758 be truncated in the MultiIndex output. 

759 

760 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) 

761 MultiIndex([('a', 1), 

762 ('a', 2), 

763 ('b', 1), 

764 ('b', 2)], 

765 names=['foo', 'bar']) 

766 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels 

767 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) 

768 """ 

769 if is_list_like(levels) and not isinstance(levels, Index): 

770 levels = list(levels) 

771 

772 if level is not None and not is_list_like(level): 

773 if not is_list_like(levels): 

774 raise TypeError("Levels must be list-like") 

775 if is_list_like(levels[0]): 

776 raise TypeError("Levels must be list-like") 

777 level = [level] 

778 levels = [levels] 

779 elif level is None or is_list_like(level): 

780 if not is_list_like(levels) or not is_list_like(levels[0]): 

781 raise TypeError("Levels must be list of lists-like") 

782 

783 if inplace: 

784 idx = self 

785 else: 

786 idx = self._shallow_copy() 

787 idx._reset_identity() 

788 idx._set_levels( 

789 levels, level=level, validate=True, verify_integrity=verify_integrity 

790 ) 

791 if not inplace: 

792 return idx 

793 

794 @property 

795 def codes(self): 

796 return self._codes 

797 

798 def _set_codes( 

799 self, codes, level=None, copy=False, validate=True, verify_integrity=False 

800 ): 

801 if validate: 

802 if level is None and len(codes) != self.nlevels: 

803 raise ValueError("Length of codes must match number of levels") 

804 if level is not None and len(codes) != len(level): 

805 raise ValueError("Length of codes must match length of levels.") 

806 

807 if level is None: 

808 new_codes = FrozenList( 

809 _coerce_indexer_frozen(level_codes, lev, copy=copy).view() 

810 for lev, level_codes in zip(self._levels, codes) 

811 ) 

812 else: 

813 level_numbers = [self._get_level_number(lev) for lev in level] 

814 new_codes = list(self._codes) 

815 for lev_num, level_codes in zip(level_numbers, codes): 

816 lev = self.levels[lev_num] 

817 new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) 

818 new_codes = FrozenList(new_codes) 

819 

820 if verify_integrity: 

821 new_codes = self._verify_integrity(codes=new_codes) 

822 

823 self._codes = new_codes 

824 

825 self._tuples = None 

826 self._reset_cache() 

827 

828 def set_codes(self, codes, level=None, inplace=False, verify_integrity=True): 

829 """ 

830 Set new codes on MultiIndex. Defaults to returning 

831 new index. 

832 

833 .. versionadded:: 0.24.0 

834 

835 New name for deprecated method `set_labels`. 

836 

837 Parameters 

838 ---------- 

839 codes : sequence or list of sequence 

840 New codes to apply. 

841 level : int, level name, or sequence of int/level names (default None) 

842 Level(s) to set (None for all levels). 

843 inplace : bool 

844 If True, mutates in place. 

845 verify_integrity : bool (default True) 

846 If True, checks that levels and codes are compatible. 

847 

848 Returns 

849 ------- 

850 new index (of same type and class...etc) 

851 

852 Examples 

853 -------- 

854 >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), 

855 (1, 'two'), 

856 (2, 'one'), 

857 (2, 'two')], 

858 names=['foo', 'bar']) 

859 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) 

860 MultiIndex([(2, 'one'), 

861 (1, 'one'), 

862 (2, 'two'), 

863 (1, 'two')], 

864 names=['foo', 'bar']) 

865 >>> idx.set_codes([1, 0, 1, 0], level=0) 

866 MultiIndex([(2, 'one'), 

867 (1, 'two'), 

868 (2, 'one'), 

869 (1, 'two')], 

870 names=['foo', 'bar']) 

871 >>> idx.set_codes([0, 0, 1, 1], level='bar') 

872 MultiIndex([(1, 'one'), 

873 (1, 'one'), 

874 (2, 'two'), 

875 (2, 'two')], 

876 names=['foo', 'bar']) 

877 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) 

878 MultiIndex([(2, 'one'), 

879 (1, 'one'), 

880 (2, 'two'), 

881 (1, 'two')], 

882 names=['foo', 'bar']) 

883 """ 

884 if level is not None and not is_list_like(level): 

885 if not is_list_like(codes): 

886 raise TypeError("Codes must be list-like") 

887 if is_list_like(codes[0]): 

888 raise TypeError("Codes must be list-like") 

889 level = [level] 

890 codes = [codes] 

891 elif level is None or is_list_like(level): 

892 if not is_list_like(codes) or not is_list_like(codes[0]): 

893 raise TypeError("Codes must be list of lists-like") 

894 

895 if inplace: 

896 idx = self 

897 else: 

898 idx = self._shallow_copy() 

899 idx._reset_identity() 

900 idx._set_codes(codes, level=level, verify_integrity=verify_integrity) 

901 if not inplace: 

902 return idx 

903 

904 def copy( 

905 self, 

906 names=None, 

907 dtype=None, 

908 levels=None, 

909 codes=None, 

910 deep=False, 

911 _set_identity=False, 

912 **kwargs, 

913 ): 

914 """ 

915 Make a copy of this object. Names, dtype, levels and codes can be 

916 passed and will be set on new copy. 

917 

918 Parameters 

919 ---------- 

920 names : sequence, optional 

921 dtype : numpy dtype or pandas type, optional 

922 levels : sequence, optional 

923 codes : sequence, optional 

924 

925 Returns 

926 ------- 

927 copy : MultiIndex 

928 

929 Notes 

930 ----- 

931 In most cases, there should be no functional difference from using 

932 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

933 This could be potentially expensive on large MultiIndex objects. 

934 """ 

935 name = kwargs.get("name") 

936 names = self._validate_names(name=name, names=names, deep=deep) 

937 if "labels" in kwargs: 

938 raise TypeError("'labels' argument has been removed; use 'codes' instead") 

939 if deep: 

940 from copy import deepcopy 

941 

942 if levels is None: 

943 levels = deepcopy(self.levels) 

944 if codes is None: 

945 codes = deepcopy(self.codes) 

946 else: 

947 if levels is None: 

948 levels = self.levels 

949 if codes is None: 

950 codes = self.codes 

951 return MultiIndex( 

952 levels=levels, 

953 codes=codes, 

954 names=names, 

955 sortorder=self.sortorder, 

956 verify_integrity=False, 

957 _set_identity=_set_identity, 

958 ) 

959 

960 def __array__(self, dtype=None) -> np.ndarray: 

961 """ the array interface, return my values """ 

962 return self.values 

963 

964 def view(self, cls=None): 

965 """ this is defined as a copy with the same identity """ 

966 result = self.copy() 

967 result._id = self._id 

968 return result 

969 

970 def _shallow_copy_with_infer(self, values, **kwargs): 

971 # On equal MultiIndexes the difference is empty. 

972 # Therefore, an empty MultiIndex is returned GH13490 

973 if len(values) == 0: 

974 return MultiIndex( 

975 levels=[[] for _ in range(self.nlevels)], 

976 codes=[[] for _ in range(self.nlevels)], 

977 **kwargs, 

978 ) 

979 return self._shallow_copy(values, **kwargs) 

980 

981 @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) 

982 def __contains__(self, key) -> bool: 

983 hash(key) 

984 try: 

985 self.get_loc(key) 

986 return True 

987 except (LookupError, TypeError, ValueError): 

988 return False 

989 

990 @Appender(_index_shared_docs["_shallow_copy"]) 

991 def _shallow_copy(self, values=None, **kwargs): 

992 if values is not None: 

993 names = kwargs.pop("names", kwargs.pop("name", self.names)) 

994 # discards freq 

995 kwargs.pop("freq", None) 

996 return MultiIndex.from_tuples(values, names=names, **kwargs) 

997 return self.copy(**kwargs) 

998 

999 @cache_readonly 

1000 def dtype(self) -> np.dtype: 

1001 return np.dtype("O") 

1002 

1003 def _is_memory_usage_qualified(self) -> bool: 

1004 """ return a boolean if we need a qualified .info display """ 

1005 

1006 def f(l): 

1007 return "mixed" in l or "string" in l or "unicode" in l 

1008 

1009 return any(f(l) for l in self._inferred_type_levels) 

1010 

1011 @Appender(Index.memory_usage.__doc__) 

1012 def memory_usage(self, deep: bool = False) -> int: 

1013 # we are overwriting our base class to avoid 

1014 # computing .values here which could materialize 

1015 # a tuple representation unnecessarily 

1016 return self._nbytes(deep) 

1017 

1018 @cache_readonly 

1019 def nbytes(self) -> int: 

1020 """ return the number of bytes in the underlying data """ 

1021 return self._nbytes(False) 

1022 

1023 def _nbytes(self, deep: bool = False) -> int: 

1024 """ 

1025 return the number of bytes in the underlying data 

1026 deeply introspect the level data if deep=True 

1027 

1028 include the engine hashtable 

1029 

1030 *this is in internal routine* 

1031 

1032 """ 

1033 

1034 # for implementations with no useful getsizeof (PyPy) 

1035 objsize = 24 

1036 

1037 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) 

1038 label_nbytes = sum(i.nbytes for i in self.codes) 

1039 names_nbytes = sum(getsizeof(i, objsize) for i in self.names) 

1040 result = level_nbytes + label_nbytes + names_nbytes 

1041 

1042 # include our engine hashtable 

1043 result += self._engine.sizeof(deep=deep) 

1044 return result 

1045 

1046 # -------------------------------------------------------------------- 

1047 # Rendering Methods 

1048 def _formatter_func(self, tup): 

1049 """ 

1050 Formats each item in tup according to its level's formatter function. 

1051 """ 

1052 formatter_funcs = [level._formatter_func for level in self.levels] 

1053 return tuple(func(val) for func, val in zip(formatter_funcs, tup)) 

1054 

1055 def _format_data(self, name=None): 

1056 """ 

1057 Return the formatted data as a unicode string 

1058 """ 

1059 return format_object_summary( 

1060 self, self._formatter_func, name=name, line_break_each_value=True 

1061 ) 

1062 

1063 def _format_attrs(self): 

1064 """ 

1065 Return a list of tuples of the (attr,formatted_value). 

1066 """ 

1067 return format_object_attrs(self, include_dtype=False) 

1068 

1069 def _format_native_types(self, na_rep="nan", **kwargs): 

1070 new_levels = [] 

1071 new_codes = [] 

1072 

1073 # go through the levels and format them 

1074 for level, level_codes in zip(self.levels, self.codes): 

1075 level = level._format_native_types(na_rep=na_rep, **kwargs) 

1076 # add nan values, if there are any 

1077 mask = level_codes == -1 

1078 if mask.any(): 

1079 nan_index = len(level) 

1080 level = np.append(level, na_rep) 

1081 assert not level_codes.flags.writeable # i.e. copy is needed 

1082 level_codes = level_codes.copy() # make writeable 

1083 level_codes[mask] = nan_index 

1084 new_levels.append(level) 

1085 new_codes.append(level_codes) 

1086 

1087 if len(new_levels) == 1: 

1088 # a single-level multi-index 

1089 return Index(new_levels[0].take(new_codes[0]))._format_native_types() 

1090 else: 

1091 # reconstruct the multi-index 

1092 mi = MultiIndex( 

1093 levels=new_levels, 

1094 codes=new_codes, 

1095 names=self.names, 

1096 sortorder=self.sortorder, 

1097 verify_integrity=False, 

1098 ) 

1099 return mi.values 

1100 

1101 def format( 

1102 self, 

1103 space=2, 

1104 sparsify=None, 

1105 adjoin=True, 

1106 names=False, 

1107 na_rep=None, 

1108 formatter=None, 

1109 ): 

1110 if len(self) == 0: 

1111 return [] 

1112 

1113 stringified_levels = [] 

1114 for lev, level_codes in zip(self.levels, self.codes): 

1115 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) 

1116 

1117 if len(lev) > 0: 

1118 

1119 formatted = lev.take(level_codes).format(formatter=formatter) 

1120 

1121 # we have some NA 

1122 mask = level_codes == -1 

1123 if mask.any(): 

1124 formatted = np.array(formatted, dtype=object) 

1125 formatted[mask] = na 

1126 formatted = formatted.tolist() 

1127 

1128 else: 

1129 # weird all NA case 

1130 formatted = [ 

1131 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) 

1132 for x in algos.take_1d(lev._values, level_codes) 

1133 ] 

1134 stringified_levels.append(formatted) 

1135 

1136 result_levels = [] 

1137 for lev, name in zip(stringified_levels, self.names): 

1138 level = [] 

1139 

1140 if names: 

1141 level.append( 

1142 pprint_thing(name, escape_chars=("\t", "\r", "\n")) 

1143 if name is not None 

1144 else "" 

1145 ) 

1146 

1147 level.extend(np.array(lev, dtype=object)) 

1148 result_levels.append(level) 

1149 

1150 if sparsify is None: 

1151 sparsify = get_option("display.multi_sparse") 

1152 

1153 if sparsify: 

1154 sentinel = "" 

1155 # GH3547 

1156 # use value of sparsify as sentinel, unless it's an obvious 

1157 # "Truthy" value 

1158 if sparsify not in [True, 1]: 

1159 sentinel = sparsify 

1160 # little bit of a kludge job for #1217 

1161 result_levels = _sparsify( 

1162 result_levels, start=int(names), sentinel=sentinel 

1163 ) 

1164 

1165 if adjoin: 

1166 from pandas.io.formats.format import _get_adjustment 

1167 

1168 adj = _get_adjustment() 

1169 return adj.adjoin(space, *result_levels).split("\n") 

1170 else: 

1171 return result_levels 

1172 

1173 # -------------------------------------------------------------------- 

1174 

1175 def __len__(self) -> int: 

1176 return len(self.codes[0]) 

1177 

1178 def _get_names(self): 

1179 return FrozenList(self._names) 

1180 

1181 def _set_names(self, names, level=None, validate=True): 

1182 """ 

1183 Set new names on index. Each name has to be a hashable type. 

1184 

1185 Parameters 

1186 ---------- 

1187 values : str or sequence 

1188 name(s) to set 

1189 level : int, level name, or sequence of int/level names (default None) 

1190 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1191 for all levels). Otherwise level must be None 

1192 validate : boolean, default True 

1193 validate that the names match level lengths 

1194 

1195 Raises 

1196 ------ 

1197 TypeError if each name is not hashable. 

1198 

1199 Notes 

1200 ----- 

1201 sets names on levels. WARNING: mutates! 

1202 

1203 Note that you generally want to set this *after* changing levels, so 

1204 that it only acts on copies 

1205 """ 

1206 # GH 15110 

1207 # Don't allow a single string for names in a MultiIndex 

1208 if names is not None and not is_list_like(names): 

1209 raise ValueError("Names should be list-like for a MultiIndex") 

1210 names = list(names) 

1211 

1212 if validate: 

1213 if level is not None and len(names) != len(level): 

1214 raise ValueError("Length of names must match length of level.") 

1215 if level is None and len(names) != self.nlevels: 

1216 raise ValueError( 

1217 "Length of names must match number of levels in MultiIndex." 

1218 ) 

1219 

1220 if level is None: 

1221 level = range(self.nlevels) 

1222 else: 

1223 level = [self._get_level_number(lev) for lev in level] 

1224 

1225 # set the name 

1226 for lev, name in zip(level, names): 

1227 if name is not None: 

1228 # GH 20527 

1229 # All items in 'names' need to be hashable: 

1230 if not is_hashable(name): 

1231 raise TypeError( 

1232 f"{type(self).__name__}.name must be a hashable type" 

1233 ) 

1234 self._names[lev] = name 

1235 

1236 # If .levels has been accessed, the names in our cache will be stale. 

1237 self._reset_cache() 

1238 

1239 names = property( 

1240 fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n""" 

1241 ) 

1242 

1243 @Appender(_index_shared_docs["_get_grouper_for_level"]) 

1244 def _get_grouper_for_level(self, mapper, level): 

1245 indexer = self.codes[level] 

1246 level_index = self.levels[level] 

1247 

1248 if mapper is not None: 

1249 # Handle group mapping function and return 

1250 level_values = self.levels[level].take(indexer) 

1251 grouper = level_values.map(mapper) 

1252 return grouper, None, None 

1253 

1254 codes, uniques = algos.factorize(indexer, sort=True) 

1255 

1256 if len(uniques) > 0 and uniques[0] == -1: 

1257 # Handle NAs 

1258 mask = indexer != -1 

1259 ok_codes, uniques = algos.factorize(indexer[mask], sort=True) 

1260 

1261 codes = np.empty(len(indexer), dtype=indexer.dtype) 

1262 codes[mask] = ok_codes 

1263 codes[~mask] = -1 

1264 

1265 if len(uniques) < len(level_index): 

1266 # Remove unobserved levels from level_index 

1267 level_index = level_index.take(uniques) 

1268 else: 

1269 # break references back to us so that setting the name 

1270 # on the output of a groupby doesn't reflect back here. 

1271 level_index = level_index.copy() 

1272 

1273 if level_index._can_hold_na: 

1274 grouper = level_index.take(codes, fill_value=True) 

1275 else: 

1276 grouper = level_index.take(codes) 

1277 

1278 return grouper, codes, level_index 

1279 

1280 @property 

1281 def _constructor(self): 

1282 return MultiIndex.from_tuples 

1283 

1284 @cache_readonly 

1285 def inferred_type(self) -> str: 

1286 return "mixed" 

1287 

1288 def _get_level_number(self, level) -> int: 

1289 count = self.names.count(level) 

1290 if (count > 1) and not is_integer(level): 

1291 raise ValueError( 

1292 f"The name {level} occurs multiple times, use a level number" 

1293 ) 

1294 try: 

1295 level = self.names.index(level) 

1296 except ValueError: 

1297 if not is_integer(level): 

1298 raise KeyError(f"Level {level} not found") 

1299 elif level < 0: 

1300 level += self.nlevels 

1301 if level < 0: 

1302 orig_level = level - self.nlevels 

1303 raise IndexError( 

1304 f"Too many levels: Index has only {self.nlevels} levels," 

1305 f" {orig_level} is not a valid level number" 

1306 ) 

1307 # Note: levels are zero-based 

1308 elif level >= self.nlevels: 

1309 raise IndexError( 

1310 f"Too many levels: Index has only {self.nlevels} levels, " 

1311 f"not {level + 1}" 

1312 ) 

1313 return level 

1314 

1315 _tuples = None 

1316 

1317 @cache_readonly 

1318 def _engine(self): 

1319 # Calculate the number of bits needed to represent labels in each 

1320 # level, as log2 of their sizes (including -1 for NaN): 

1321 sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels])) 

1322 

1323 # Sum bit counts, starting from the _right_.... 

1324 lev_bits = np.cumsum(sizes[::-1])[::-1] 

1325 

1326 # ... in order to obtain offsets such that sorting the combination of 

1327 # shifted codes (one for each level, resulting in a unique integer) is 

1328 # equivalent to sorting lexicographically the codes themselves. Notice 

1329 # that each level needs to be shifted by the number of bits needed to 

1330 # represent the _previous_ ones: 

1331 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") 

1332 

1333 # Check the total number of bits needed for our representation: 

1334 if lev_bits[0] > 64: 

1335 # The levels would overflow a 64 bit uint - use Python integers: 

1336 return MultiIndexPyIntEngine(self.levels, self.codes, offsets) 

1337 return MultiIndexUIntEngine(self.levels, self.codes, offsets) 

1338 

1339 @property 

1340 def values(self): 

1341 if self._tuples is not None: 

1342 return self._tuples 

1343 

1344 values = [] 

1345 

1346 for i in range(self.nlevels): 

1347 vals = self._get_level_values(i) 

1348 if is_categorical_dtype(vals): 

1349 vals = vals._internal_get_values() 

1350 if isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, "_box_values"): 

1351 vals = vals.astype(object) 

1352 vals = np.array(vals, copy=False) 

1353 values.append(vals) 

1354 

1355 self._tuples = lib.fast_zip(values) 

1356 return self._tuples 

1357 

1358 @property 

1359 def _has_complex_internals(self): 

1360 # used to avoid libreduction code paths, which raise or require conversion 

1361 return True 

1362 

1363 @cache_readonly 

1364 def is_monotonic_increasing(self) -> bool: 

1365 """ 

1366 return if the index is monotonic increasing (only equal or 

1367 increasing) values. 

1368 """ 

1369 

1370 if all(x.is_monotonic for x in self.levels): 

1371 # If each level is sorted, we can operate on the codes directly. GH27495 

1372 return libalgos.is_lexsorted( 

1373 [x.astype("int64", copy=False) for x in self.codes] 

1374 ) 

1375 

1376 # reversed() because lexsort() wants the most significant key last. 

1377 values = [ 

1378 self._get_level_values(i).values for i in reversed(range(len(self.levels))) 

1379 ] 

1380 try: 

1381 sort_order = np.lexsort(values) 

1382 return Index(sort_order).is_monotonic 

1383 except TypeError: 

1384 

1385 # we have mixed types and np.lexsort is not happy 

1386 return Index(self.values).is_monotonic 

1387 

1388 @cache_readonly 

1389 def is_monotonic_decreasing(self) -> bool: 

1390 """ 

1391 return if the index is monotonic decreasing (only equal or 

1392 decreasing) values. 

1393 """ 

1394 # monotonic decreasing if and only if reverse is monotonic increasing 

1395 return self[::-1].is_monotonic_increasing 

1396 

1397 @cache_readonly 

1398 def _have_mixed_levels(self): 

1399 """ return a boolean list indicated if we have mixed levels """ 

1400 return ["mixed" in l for l in self._inferred_type_levels] 

1401 

1402 @cache_readonly 

1403 def _inferred_type_levels(self): 

1404 """ return a list of the inferred types, one for each level """ 

1405 return [i.inferred_type for i in self.levels] 

1406 

1407 @cache_readonly 

1408 def _hashed_values(self): 

1409 """ return a uint64 ndarray of my hashed values """ 

1410 return hash_tuples(self) 

1411 

1412 def _hashed_indexing_key(self, key): 

1413 """ 

1414 validate and return the hash for the provided key 

1415 

1416 *this is internal for use for the cython routines* 

1417 

1418 Parameters 

1419 ---------- 

1420 key : string or tuple 

1421 

1422 Returns 

1423 ------- 

1424 np.uint64 

1425 

1426 Notes 

1427 ----- 

1428 we need to stringify if we have mixed levels 

1429 """ 

1430 

1431 if not isinstance(key, tuple): 

1432 return hash_tuples(key) 

1433 

1434 if not len(key) == self.nlevels: 

1435 raise KeyError 

1436 

1437 def f(k, stringify): 

1438 if stringify and not isinstance(k, str): 

1439 k = str(k) 

1440 return k 

1441 

1442 key = tuple( 

1443 f(k, stringify) for k, stringify in zip(key, self._have_mixed_levels) 

1444 ) 

1445 return hash_tuple(key) 

1446 

1447 @Appender(Index.duplicated.__doc__) 

1448 def duplicated(self, keep="first"): 

1449 shape = map(len, self.levels) 

1450 ids = get_group_index(self.codes, shape, sort=False, xnull=False) 

1451 

1452 return duplicated_int64(ids, keep) 

1453 

1454 def fillna(self, value=None, downcast=None): 

1455 """ 

1456 fillna is not implemented for MultiIndex 

1457 """ 

1458 raise NotImplementedError("isna is not defined for MultiIndex") 

1459 

1460 @Appender(_index_shared_docs["dropna"]) 

1461 def dropna(self, how="any"): 

1462 nans = [level_codes == -1 for level_codes in self.codes] 

1463 if how == "any": 

1464 indexer = np.any(nans, axis=0) 

1465 elif how == "all": 

1466 indexer = np.all(nans, axis=0) 

1467 else: 

1468 raise ValueError(f"invalid how option: {how}") 

1469 

1470 new_codes = [level_codes[~indexer] for level_codes in self.codes] 

1471 return self.copy(codes=new_codes, deep=True) 

1472 

1473 def get_value(self, series, key): 

1474 # Label-based 

1475 s = com.values_from_object(series) 

1476 k = com.values_from_object(key) 

1477 

1478 def _try_mi(k): 

1479 # TODO: what if a level contains tuples?? 

1480 loc = self.get_loc(k) 

1481 new_values = series._values[loc] 

1482 new_index = self[loc] 

1483 new_index = maybe_droplevels(new_index, k) 

1484 return series._constructor( 

1485 new_values, index=new_index, name=series.name 

1486 ).__finalize__(self) 

1487 

1488 try: 

1489 return self._engine.get_value(s, k) 

1490 except KeyError as e1: 

1491 try: 

1492 return _try_mi(key) 

1493 except KeyError: 

1494 pass 

1495 

1496 try: 

1497 return libindex.get_value_at(s, k) 

1498 except IndexError: 

1499 raise 

1500 except TypeError: 

1501 # generator/iterator-like 

1502 if is_iterator(key): 

1503 raise InvalidIndexError(key) 

1504 else: 

1505 raise e1 

1506 except Exception: # pragma: no cover 

1507 raise e1 

1508 except TypeError: 

1509 

1510 # a Timestamp will raise a TypeError in a multi-index 

1511 # rather than a KeyError, try it here 

1512 # note that a string that 'looks' like a Timestamp will raise 

1513 # a KeyError! (GH5725) 

1514 if isinstance(key, (datetime.datetime, np.datetime64, str)): 

1515 try: 

1516 return _try_mi(key) 

1517 except KeyError: 

1518 raise 

1519 except (IndexError, ValueError, TypeError): 

1520 pass 

1521 

1522 try: 

1523 return _try_mi(Timestamp(key)) 

1524 except ( 

1525 KeyError, 

1526 TypeError, 

1527 IndexError, 

1528 ValueError, 

1529 tslibs.OutOfBoundsDatetime, 

1530 ): 

1531 pass 

1532 

1533 raise InvalidIndexError(key) 

1534 

1535 def _get_level_values(self, level, unique=False): 

1536 """ 

1537 Return vector of label values for requested level, 

1538 equal to the length of the index 

1539 

1540 **this is an internal method** 

1541 

1542 Parameters 

1543 ---------- 

1544 level : int level 

1545 unique : bool, default False 

1546 if True, drop duplicated values 

1547 

1548 Returns 

1549 ------- 

1550 values : ndarray 

1551 """ 

1552 

1553 lev = self.levels[level] 

1554 level_codes = self.codes[level] 

1555 name = self._names[level] 

1556 if unique: 

1557 level_codes = algos.unique(level_codes) 

1558 filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value) 

1559 return lev._shallow_copy(filled, name=name) 

1560 

1561 def get_level_values(self, level): 

1562 """ 

1563 Return vector of label values for requested level, 

1564 equal to the length of the index. 

1565 

1566 Parameters 

1567 ---------- 

1568 level : int or str 

1569 ``level`` is either the integer position of the level in the 

1570 MultiIndex, or the name of the level. 

1571 

1572 Returns 

1573 ------- 

1574 values : Index 

1575 Values is a level of this MultiIndex converted to 

1576 a single :class:`Index` (or subclass thereof). 

1577 

1578 Examples 

1579 -------- 

1580 

1581 Create a MultiIndex: 

1582 

1583 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) 

1584 >>> mi.names = ['level_1', 'level_2'] 

1585 

1586 Get level values by supplying level as either integer or name: 

1587 

1588 >>> mi.get_level_values(0) 

1589 Index(['a', 'b', 'c'], dtype='object', name='level_1') 

1590 >>> mi.get_level_values('level_2') 

1591 Index(['d', 'e', 'f'], dtype='object', name='level_2') 

1592 """ 

1593 level = self._get_level_number(level) 

1594 values = self._get_level_values(level) 

1595 return values 

1596 

1597 @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) 

1598 def unique(self, level=None): 

1599 

1600 if level is None: 

1601 return super().unique() 

1602 else: 

1603 level = self._get_level_number(level) 

1604 return self._get_level_values(level=level, unique=True) 

1605 

1606 def _to_safe_for_reshape(self): 

1607 """ convert to object if we are a categorical """ 

1608 return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) 

1609 

1610 def to_frame(self, index=True, name=None): 

1611 """ 

1612 Create a DataFrame with the levels of the MultiIndex as columns. 

1613 

1614 Column ordering is determined by the DataFrame constructor with data as 

1615 a dict. 

1616 

1617 .. versionadded:: 0.24.0 

1618 

1619 Parameters 

1620 ---------- 

1621 index : bool, default True 

1622 Set the index of the returned DataFrame as the original MultiIndex. 

1623 

1624 name : list / sequence of strings, optional 

1625 The passed names should substitute index level names. 

1626 

1627 Returns 

1628 ------- 

1629 DataFrame : a DataFrame containing the original MultiIndex data. 

1630 

1631 See Also 

1632 -------- 

1633 DataFrame 

1634 """ 

1635 

1636 from pandas import DataFrame 

1637 

1638 if name is not None: 

1639 if not is_list_like(name): 

1640 raise TypeError("'name' must be a list / sequence of column names.") 

1641 

1642 if len(name) != len(self.levels): 

1643 raise ValueError( 

1644 "'name' should have same length as number of levels on index." 

1645 ) 

1646 idx_names = name 

1647 else: 

1648 idx_names = self.names 

1649 

1650 # Guarantee resulting column order - PY36+ dict maintains insertion order 

1651 result = DataFrame( 

1652 { 

1653 (level if lvlname is None else lvlname): self._get_level_values(level) 

1654 for lvlname, level in zip(idx_names, range(len(self.levels))) 

1655 }, 

1656 copy=False, 

1657 ) 

1658 

1659 if index: 

1660 result.index = self 

1661 return result 

1662 

1663 def to_flat_index(self): 

1664 """ 

1665 Convert a MultiIndex to an Index of Tuples containing the level values. 

1666 

1667 .. versionadded:: 0.24.0 

1668 

1669 Returns 

1670 ------- 

1671 pd.Index 

1672 Index with the MultiIndex data represented in Tuples. 

1673 

1674 Notes 

1675 ----- 

1676 This method will simply return the caller if called by anything other 

1677 than a MultiIndex. 

1678 

1679 Examples 

1680 -------- 

1681 >>> index = pd.MultiIndex.from_product( 

1682 ... [['foo', 'bar'], ['baz', 'qux']], 

1683 ... names=['a', 'b']) 

1684 >>> index.to_flat_index() 

1685 Index([('foo', 'baz'), ('foo', 'qux'), 

1686 ('bar', 'baz'), ('bar', 'qux')], 

1687 dtype='object') 

1688 """ 

1689 return Index(self.values, tupleize_cols=False) 

1690 

1691 @property 

1692 def is_all_dates(self) -> bool: 

1693 return False 

1694 

1695 def is_lexsorted(self) -> bool: 

1696 """ 

1697 Return True if the codes are lexicographically sorted. 

1698 

1699 Returns 

1700 ------- 

1701 bool 

1702 """ 

1703 return self.lexsort_depth == self.nlevels 

1704 

1705 @cache_readonly 

1706 def lexsort_depth(self): 

1707 if self.sortorder is not None: 

1708 return self.sortorder 

1709 

1710 return self._lexsort_depth() 

1711 

1712 def _lexsort_depth(self) -> int: 

1713 """ 

1714 Compute and return the lexsort_depth, the number of levels of the 

1715 MultiIndex that are sorted lexically 

1716 

1717 Returns 

1718 ------ 

1719 int 

1720 """ 

1721 int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] 

1722 for k in range(self.nlevels, 0, -1): 

1723 if libalgos.is_lexsorted(int64_codes[:k]): 

1724 return k 

1725 return 0 

1726 

1727 def _sort_levels_monotonic(self): 

1728 """ 

1729 This is an *internal* function. 

1730 

1731 Create a new MultiIndex from the current to monotonically sorted 

1732 items IN the levels. This does not actually make the entire MultiIndex 

1733 monotonic, JUST the levels. 

1734 

1735 The resulting MultiIndex will have the same outward 

1736 appearance, meaning the same .values and ordering. It will also 

1737 be .equals() to the original. 

1738 

1739 Returns 

1740 ------- 

1741 MultiIndex 

1742 

1743 Examples 

1744 -------- 

1745 

1746 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

1747 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

1748 >>> mi 

1749 MultiIndex([('a', 'bb'), 

1750 ('a', 'aa'), 

1751 ('b', 'bb'), 

1752 ('b', 'aa')], 

1753 ) 

1754 

1755 >>> mi.sort_values() 

1756 MultiIndex([('a', 'aa'), 

1757 ('a', 'bb'), 

1758 ('b', 'aa'), 

1759 ('b', 'bb')], 

1760 ) 

1761 """ 

1762 

1763 if self.is_lexsorted() and self.is_monotonic: 

1764 return self 

1765 

1766 new_levels = [] 

1767 new_codes = [] 

1768 

1769 for lev, level_codes in zip(self.levels, self.codes): 

1770 

1771 if not lev.is_monotonic: 

1772 try: 

1773 # indexer to reorder the levels 

1774 indexer = lev.argsort() 

1775 except TypeError: 

1776 pass 

1777 else: 

1778 lev = lev.take(indexer) 

1779 

1780 # indexer to reorder the level codes 

1781 indexer = ensure_int64(indexer) 

1782 ri = lib.get_reverse_indexer(indexer, len(indexer)) 

1783 level_codes = algos.take_1d(ri, level_codes) 

1784 

1785 new_levels.append(lev) 

1786 new_codes.append(level_codes) 

1787 

1788 return MultiIndex( 

1789 new_levels, 

1790 new_codes, 

1791 names=self.names, 

1792 sortorder=self.sortorder, 

1793 verify_integrity=False, 

1794 ) 

1795 

1796 def remove_unused_levels(self): 

1797 """ 

1798 Create a new MultiIndex from the current that removes 

1799 unused levels, meaning that they are not expressed in the labels. 

1800 

1801 The resulting MultiIndex will have the same outward 

1802 appearance, meaning the same .values and ordering. It will also 

1803 be .equals() to the original. 

1804 

1805 Returns 

1806 ------- 

1807 MultiIndex 

1808 

1809 Examples 

1810 -------- 

1811 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) 

1812 >>> mi 

1813 MultiIndex([(0, 'a'), 

1814 (0, 'b'), 

1815 (1, 'a'), 

1816 (1, 'b')], 

1817 ) 

1818 

1819 >>> mi[2:] 

1820 MultiIndex([(1, 'a'), 

1821 (1, 'b')], 

1822 ) 

1823 

1824 The 0 from the first level is not represented 

1825 and can be removed 

1826 

1827 >>> mi2 = mi[2:].remove_unused_levels() 

1828 >>> mi2.levels 

1829 FrozenList([[1], ['a', 'b']]) 

1830 """ 

1831 

1832 new_levels = [] 

1833 new_codes = [] 

1834 

1835 changed = False 

1836 for lev, level_codes in zip(self.levels, self.codes): 

1837 

1838 # Since few levels are typically unused, bincount() is more 

1839 # efficient than unique() - however it only accepts positive values 

1840 # (and drops order): 

1841 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 

1842 has_na = int(len(uniques) and (uniques[0] == -1)) 

1843 

1844 if len(uniques) != len(lev) + has_na: 

1845 # We have unused levels 

1846 changed = True 

1847 

1848 # Recalculate uniques, now preserving order. 

1849 # Can easily be cythonized by exploiting the already existing 

1850 # "uniques" and stop parsing "level_codes" when all items 

1851 # are found: 

1852 uniques = algos.unique(level_codes) 

1853 if has_na: 

1854 na_idx = np.where(uniques == -1)[0] 

1855 # Just ensure that -1 is in first position: 

1856 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] 

1857 

1858 # codes get mapped from uniques to 0:len(uniques) 

1859 # -1 (if present) is mapped to last position 

1860 code_mapping = np.zeros(len(lev) + has_na) 

1861 # ... and reassigned value -1: 

1862 code_mapping[uniques] = np.arange(len(uniques)) - has_na 

1863 

1864 level_codes = code_mapping[level_codes] 

1865 

1866 # new levels are simple 

1867 lev = lev.take(uniques[has_na:]) 

1868 

1869 new_levels.append(lev) 

1870 new_codes.append(level_codes) 

1871 

1872 result = self.view() 

1873 

1874 if changed: 

1875 result._reset_identity() 

1876 result._set_levels(new_levels, validate=False) 

1877 result._set_codes(new_codes, validate=False) 

1878 

1879 return result 

1880 

1881 @property 

1882 def nlevels(self) -> int: 

1883 """ 

1884 Integer number of levels in this MultiIndex. 

1885 """ 

1886 return len(self._levels) 

1887 

1888 @property 

1889 def levshape(self): 

1890 """ 

1891 A tuple with the length of each level. 

1892 """ 

1893 return tuple(len(x) for x in self.levels) 

1894 

1895 def __reduce__(self): 

1896 """Necessary for making this object picklable""" 

1897 d = dict( 

1898 levels=list(self.levels), 

1899 codes=list(self.codes), 

1900 sortorder=self.sortorder, 

1901 names=list(self.names), 

1902 ) 

1903 return ibase._new_Index, (type(self), d), None 

1904 

1905 def __setstate__(self, state): 

1906 """Necessary for making this object picklable""" 

1907 

1908 if isinstance(state, dict): 

1909 levels = state.get("levels") 

1910 codes = state.get("codes") 

1911 sortorder = state.get("sortorder") 

1912 names = state.get("names") 

1913 

1914 elif isinstance(state, tuple): 

1915 

1916 nd_state, own_state = state 

1917 levels, codes, sortorder, names = own_state 

1918 

1919 self._set_levels([Index(x) for x in levels], validate=False) 

1920 self._set_codes(codes) 

1921 new_codes = self._verify_integrity() 

1922 self._set_codes(new_codes) 

1923 self._set_names(names) 

1924 self.sortorder = sortorder 

1925 self._reset_identity() 

1926 

1927 def __getitem__(self, key): 

1928 if is_scalar(key): 

1929 key = com.cast_scalar_indexer(key) 

1930 

1931 retval = [] 

1932 for lev, level_codes in zip(self.levels, self.codes): 

1933 if level_codes[key] == -1: 

1934 retval.append(np.nan) 

1935 else: 

1936 retval.append(lev[level_codes[key]]) 

1937 

1938 return tuple(retval) 

1939 else: 

1940 if com.is_bool_indexer(key): 

1941 key = np.asarray(key, dtype=bool) 

1942 sortorder = self.sortorder 

1943 else: 

1944 # cannot be sure whether the result will be sorted 

1945 sortorder = None 

1946 

1947 if isinstance(key, Index): 

1948 key = np.asarray(key) 

1949 

1950 new_codes = [level_codes[key] for level_codes in self.codes] 

1951 

1952 return MultiIndex( 

1953 levels=self.levels, 

1954 codes=new_codes, 

1955 names=self.names, 

1956 sortorder=sortorder, 

1957 verify_integrity=False, 

1958 ) 

1959 

1960 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

1961 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): 

1962 nv.validate_take(tuple(), kwargs) 

1963 indices = ensure_platform_int(indices) 

1964 taken = self._assert_take_fillable( 

1965 self.codes, 

1966 indices, 

1967 allow_fill=allow_fill, 

1968 fill_value=fill_value, 

1969 na_value=-1, 

1970 ) 

1971 return MultiIndex( 

1972 levels=self.levels, codes=taken, names=self.names, verify_integrity=False 

1973 ) 

1974 

1975 def _assert_take_fillable( 

1976 self, values, indices, allow_fill=True, fill_value=None, na_value=None 

1977 ): 

1978 """ Internal method to handle NA filling of take """ 

1979 # only fill if we are passing a non-None fill_value 

1980 if allow_fill and fill_value is not None: 

1981 if (indices < -1).any(): 

1982 msg = ( 

1983 "When allow_fill=True and fill_value is not None, " 

1984 "all indices must be >= -1" 

1985 ) 

1986 raise ValueError(msg) 

1987 taken = [lab.take(indices) for lab in self.codes] 

1988 mask = indices == -1 

1989 if mask.any(): 

1990 masked = [] 

1991 for new_label in taken: 

1992 label_values = new_label 

1993 label_values[mask] = na_value 

1994 masked.append(np.asarray(label_values)) 

1995 taken = masked 

1996 else: 

1997 taken = [lab.take(indices) for lab in self.codes] 

1998 return taken 

1999 

2000 def append(self, other): 

2001 """ 

2002 Append a collection of Index options together 

2003 

2004 Parameters 

2005 ---------- 

2006 other : Index or list/tuple of indices 

2007 

2008 Returns 

2009 ------- 

2010 appended : Index 

2011 """ 

2012 if not isinstance(other, (list, tuple)): 

2013 other = [other] 

2014 

2015 if all( 

2016 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other 

2017 ): 

2018 arrays = [] 

2019 for i in range(self.nlevels): 

2020 label = self._get_level_values(i) 

2021 appended = [o._get_level_values(i) for o in other] 

2022 arrays.append(label.append(appended)) 

2023 return MultiIndex.from_arrays(arrays, names=self.names) 

2024 

2025 to_concat = (self.values,) + tuple(k._values for k in other) 

2026 new_tuples = np.concatenate(to_concat) 

2027 

2028 # if all(isinstance(x, MultiIndex) for x in other): 

2029 try: 

2030 return MultiIndex.from_tuples(new_tuples, names=self.names) 

2031 except (TypeError, IndexError): 

2032 return Index(new_tuples) 

2033 

2034 def argsort(self, *args, **kwargs): 

2035 return self.values.argsort(*args, **kwargs) 

2036 

2037 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

2038 def repeat(self, repeats, axis=None): 

2039 nv.validate_repeat(tuple(), dict(axis=axis)) 

2040 repeats = ensure_platform_int(repeats) 

2041 return MultiIndex( 

2042 levels=self.levels, 

2043 codes=[ 

2044 level_codes.view(np.ndarray).astype(np.intp).repeat(repeats) 

2045 for level_codes in self.codes 

2046 ], 

2047 names=self.names, 

2048 sortorder=self.sortorder, 

2049 verify_integrity=False, 

2050 ) 

2051 

2052 def where(self, cond, other=None): 

2053 raise NotImplementedError(".where is not supported for MultiIndex operations") 

2054 

2055 def drop(self, codes, level=None, errors="raise"): 

2056 """ 

2057 Make new MultiIndex with passed list of codes deleted 

2058 

2059 Parameters 

2060 ---------- 

2061 codes : array-like 

2062 Must be a list of tuples 

2063 level : int or level name, default None 

2064 errors : str, default 'raise' 

2065 

2066 Returns 

2067 ------- 

2068 dropped : MultiIndex 

2069 """ 

2070 if level is not None: 

2071 return self._drop_from_level(codes, level, errors) 

2072 

2073 if not isinstance(codes, (np.ndarray, Index)): 

2074 try: 

2075 codes = com.index_labels_to_array(codes, dtype=object) 

2076 except ValueError: 

2077 pass 

2078 

2079 inds = [] 

2080 for level_codes in codes: 

2081 try: 

2082 loc = self.get_loc(level_codes) 

2083 # get_loc returns either an integer, a slice, or a boolean 

2084 # mask 

2085 if isinstance(loc, int): 

2086 inds.append(loc) 

2087 elif isinstance(loc, slice): 

2088 inds.extend(range(loc.start, loc.stop)) 

2089 elif com.is_bool_indexer(loc): 

2090 if self.lexsort_depth == 0: 

2091 warnings.warn( 

2092 "dropping on a non-lexsorted multi-index " 

2093 "without a level parameter may impact performance.", 

2094 PerformanceWarning, 

2095 stacklevel=3, 

2096 ) 

2097 loc = loc.nonzero()[0] 

2098 inds.extend(loc) 

2099 else: 

2100 msg = f"unsupported indexer of type {type(loc)}" 

2101 raise AssertionError(msg) 

2102 except KeyError: 

2103 if errors != "ignore": 

2104 raise 

2105 

2106 return self.delete(inds) 

2107 

2108 def _drop_from_level(self, codes, level, errors="raise"): 

2109 codes = com.index_labels_to_array(codes) 

2110 i = self._get_level_number(level) 

2111 index = self.levels[i] 

2112 values = index.get_indexer(codes) 

2113 

2114 mask = ~algos.isin(self.codes[i], values) 

2115 if mask.all() and errors != "ignore": 

2116 raise KeyError(f"labels {codes} not found in level") 

2117 

2118 return self[mask] 

2119 

2120 def swaplevel(self, i=-2, j=-1): 

2121 """ 

2122 Swap level i with level j. 

2123 

2124 Calling this method does not change the ordering of the values. 

2125 

2126 Parameters 

2127 ---------- 

2128 i : int, str, default -2 

2129 First level of index to be swapped. Can pass level name as string. 

2130 Type of parameters can be mixed. 

2131 j : int, str, default -1 

2132 Second level of index to be swapped. Can pass level name as string. 

2133 Type of parameters can be mixed. 

2134 

2135 Returns 

2136 ------- 

2137 MultiIndex 

2138 A new MultiIndex. 

2139 

2140 See Also 

2141 -------- 

2142 Series.swaplevel : Swap levels i and j in a MultiIndex. 

2143 Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a 

2144 particular axis. 

2145 

2146 Examples 

2147 -------- 

2148 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

2149 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

2150 >>> mi 

2151 MultiIndex([('a', 'bb'), 

2152 ('a', 'aa'), 

2153 ('b', 'bb'), 

2154 ('b', 'aa')], 

2155 ) 

2156 >>> mi.swaplevel(0, 1) 

2157 MultiIndex([('bb', 'a'), 

2158 ('aa', 'a'), 

2159 ('bb', 'b'), 

2160 ('aa', 'b')], 

2161 ) 

2162 """ 

2163 new_levels = list(self.levels) 

2164 new_codes = list(self.codes) 

2165 new_names = list(self.names) 

2166 

2167 i = self._get_level_number(i) 

2168 j = self._get_level_number(j) 

2169 

2170 new_levels[i], new_levels[j] = new_levels[j], new_levels[i] 

2171 new_codes[i], new_codes[j] = new_codes[j], new_codes[i] 

2172 new_names[i], new_names[j] = new_names[j], new_names[i] 

2173 

2174 return MultiIndex( 

2175 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2176 ) 

2177 

2178 def reorder_levels(self, order): 

2179 """ 

2180 Rearrange levels using input order. May not drop or duplicate levels. 

2181 

2182 Parameters 

2183 ---------- 

2184 

2185 Returns 

2186 ------- 

2187 MultiIndex 

2188 """ 

2189 order = [self._get_level_number(i) for i in order] 

2190 if len(order) != self.nlevels: 

2191 raise AssertionError( 

2192 f"Length of order must be same as number of levels ({self.nlevels})," 

2193 f" got {len(order)}" 

2194 ) 

2195 new_levels = [self.levels[i] for i in order] 

2196 new_codes = [self.codes[i] for i in order] 

2197 new_names = [self.names[i] for i in order] 

2198 

2199 return MultiIndex( 

2200 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2201 ) 

2202 

2203 def _get_codes_for_sorting(self): 

2204 """ 

2205 we categorizing our codes by using the 

2206 available categories (all, not just observed) 

2207 excluding any missing ones (-1); this is in preparation 

2208 for sorting, where we need to disambiguate that -1 is not 

2209 a valid valid 

2210 """ 

2211 

2212 def cats(level_codes): 

2213 return np.arange( 

2214 np.array(level_codes).max() + 1 if len(level_codes) else 0, 

2215 dtype=level_codes.dtype, 

2216 ) 

2217 

2218 return [ 

2219 Categorical.from_codes(level_codes, cats(level_codes), ordered=True) 

2220 for level_codes in self.codes 

2221 ] 

2222 

2223 def sortlevel(self, level=0, ascending=True, sort_remaining=True): 

2224 """ 

2225 Sort MultiIndex at the requested level. The result will respect the 

2226 original ordering of the associated factor at that level. 

2227 

2228 Parameters 

2229 ---------- 

2230 level : list-like, int or str, default 0 

2231 If a string is given, must be a name of the level. 

2232 If list-like must be names or ints of levels. 

2233 ascending : bool, default True 

2234 False to sort in descending order. 

2235 Can also be a list to specify a directed ordering. 

2236 sort_remaining : sort by the remaining levels after level 

2237 

2238 Returns 

2239 ------- 

2240 sorted_index : pd.MultiIndex 

2241 Resulting index. 

2242 indexer : np.ndarray 

2243 Indices of output values in original index. 

2244 """ 

2245 if isinstance(level, (str, int)): 

2246 level = [level] 

2247 level = [self._get_level_number(lev) for lev in level] 

2248 sortorder = None 

2249 

2250 # we have a directed ordering via ascending 

2251 if isinstance(ascending, list): 

2252 if not len(level) == len(ascending): 

2253 raise ValueError("level must have same length as ascending") 

2254 

2255 indexer = lexsort_indexer( 

2256 [self.codes[lev] for lev in level], orders=ascending 

2257 ) 

2258 

2259 # level ordering 

2260 else: 

2261 

2262 codes = list(self.codes) 

2263 shape = list(self.levshape) 

2264 

2265 # partition codes and shape 

2266 primary = tuple(codes[lev] for lev in level) 

2267 primshp = tuple(shape[lev] for lev in level) 

2268 

2269 # Reverse sorted to retain the order of 

2270 # smaller indices that needs to be removed 

2271 for lev in sorted(level, reverse=True): 

2272 codes.pop(lev) 

2273 shape.pop(lev) 

2274 

2275 if sort_remaining: 

2276 primary += primary + tuple(codes) 

2277 primshp += primshp + tuple(shape) 

2278 else: 

2279 sortorder = level[0] 

2280 

2281 indexer = indexer_from_factorized(primary, primshp, compress=False) 

2282 

2283 if not ascending: 

2284 indexer = indexer[::-1] 

2285 

2286 indexer = ensure_platform_int(indexer) 

2287 new_codes = [level_codes.take(indexer) for level_codes in self.codes] 

2288 

2289 new_index = MultiIndex( 

2290 codes=new_codes, 

2291 levels=self.levels, 

2292 names=self.names, 

2293 sortorder=sortorder, 

2294 verify_integrity=False, 

2295 ) 

2296 

2297 return new_index, indexer 

2298 

2299 def _convert_listlike_indexer(self, keyarr, kind=None): 

2300 """ 

2301 Parameters 

2302 ---------- 

2303 keyarr : list-like 

2304 Indexer to convert. 

2305 

2306 Returns 

2307 ------- 

2308 tuple (indexer, keyarr) 

2309 indexer is an ndarray or None if cannot convert 

2310 keyarr are tuple-safe keys 

2311 """ 

2312 indexer, keyarr = super()._convert_listlike_indexer(keyarr, kind=kind) 

2313 

2314 # are we indexing a specific level 

2315 if indexer is None and len(keyarr) and not isinstance(keyarr[0], tuple): 

2316 level = 0 

2317 _, indexer = self.reindex(keyarr, level=level) 

2318 

2319 # take all 

2320 if indexer is None: 

2321 indexer = np.arange(len(self)) 

2322 

2323 check = self.levels[0].get_indexer(keyarr) 

2324 mask = check == -1 

2325 if mask.any(): 

2326 raise KeyError(f"{keyarr[mask]} not in index") 

2327 

2328 return indexer, keyarr 

2329 

2330 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) 

2331 def get_indexer(self, target, method=None, limit=None, tolerance=None): 

2332 method = missing.clean_reindex_fill_method(method) 

2333 target = ensure_index(target) 

2334 

2335 # empty indexer 

2336 if is_list_like(target) and not len(target): 

2337 return ensure_platform_int(np.array([])) 

2338 

2339 if not isinstance(target, MultiIndex): 

2340 try: 

2341 target = MultiIndex.from_tuples(target) 

2342 except (TypeError, ValueError): 

2343 

2344 # let's instead try with a straight Index 

2345 if method is None: 

2346 return Index(self.values).get_indexer( 

2347 target, method=method, limit=limit, tolerance=tolerance 

2348 ) 

2349 

2350 if not self.is_unique: 

2351 raise ValueError("Reindexing only valid with uniquely valued Index objects") 

2352 

2353 if method == "pad" or method == "backfill": 

2354 if tolerance is not None: 

2355 raise NotImplementedError( 

2356 "tolerance not implemented yet for MultiIndex" 

2357 ) 

2358 indexer = self._engine.get_indexer(target, method, limit) 

2359 elif method == "nearest": 

2360 raise NotImplementedError( 

2361 "method='nearest' not implemented yet " 

2362 "for MultiIndex; see GitHub issue 9365" 

2363 ) 

2364 else: 

2365 indexer = self._engine.get_indexer(target) 

2366 

2367 return ensure_platform_int(indexer) 

2368 

2369 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

2370 def get_indexer_non_unique(self, target): 

2371 return super().get_indexer_non_unique(target) 

2372 

2373 def reindex(self, target, method=None, level=None, limit=None, tolerance=None): 

2374 """ 

2375 Create index with target's values (move/add/delete values as necessary) 

2376 

2377 Returns 

2378 ------- 

2379 new_index : pd.MultiIndex 

2380 Resulting index 

2381 indexer : np.ndarray or None 

2382 Indices of output values in original index. 

2383 

2384 """ 

2385 # GH6552: preserve names when reindexing to non-named target 

2386 # (i.e. neither Index nor Series). 

2387 preserve_names = not hasattr(target, "names") 

2388 

2389 if level is not None: 

2390 if method is not None: 

2391 raise TypeError("Fill method not supported if level passed") 

2392 

2393 # GH7774: preserve dtype/tz if target is empty and not an Index. 

2394 # target may be an iterator 

2395 target = ibase._ensure_has_len(target) 

2396 if len(target) == 0 and not isinstance(target, Index): 

2397 idx = self.levels[level] 

2398 attrs = idx._get_attributes_dict() 

2399 attrs.pop("freq", None) # don't preserve freq 

2400 target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), **attrs) 

2401 else: 

2402 target = ensure_index(target) 

2403 target, indexer, _ = self._join_level( 

2404 target, level, how="right", return_indexers=True, keep_order=False 

2405 ) 

2406 else: 

2407 target = ensure_index(target) 

2408 if self.equals(target): 

2409 indexer = None 

2410 else: 

2411 if self.is_unique: 

2412 indexer = self.get_indexer( 

2413 target, method=method, limit=limit, tolerance=tolerance 

2414 ) 

2415 else: 

2416 raise ValueError("cannot handle a non-unique multi-index!") 

2417 

2418 if not isinstance(target, MultiIndex): 

2419 if indexer is None: 

2420 target = self 

2421 elif (indexer >= 0).all(): 

2422 target = self.take(indexer) 

2423 else: 

2424 # hopefully? 

2425 target = MultiIndex.from_tuples(target) 

2426 

2427 if ( 

2428 preserve_names 

2429 and target.nlevels == self.nlevels 

2430 and target.names != self.names 

2431 ): 

2432 target = target.copy(deep=False) 

2433 target.names = self.names 

2434 

2435 return target, indexer 

2436 

2437 def get_slice_bound( 

2438 self, label: Union[Hashable, Sequence[Hashable]], side: str, kind: str 

2439 ) -> int: 

2440 """ 

2441 For an ordered MultiIndex, compute slice bound 

2442 that corresponds to given label. 

2443 

2444 Returns leftmost (one-past-the-rightmost if `side=='right') position 

2445 of given label. 

2446 

2447 Parameters 

2448 ---------- 

2449 label : object or tuple of objects 

2450 side : {'left', 'right'} 

2451 kind : {'loc', 'getitem'} 

2452 

2453 Returns 

2454 ------- 

2455 int 

2456 Index of label. 

2457 

2458 Notes 

2459 ----- 

2460 This method only works if level 0 index of the MultiIndex is lexsorted. 

2461 

2462 Examples 

2463 -------- 

2464 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) 

2465 

2466 Get the locations from the leftmost 'b' in the first level 

2467 until the end of the multiindex: 

2468 

2469 >>> mi.get_slice_bound('b', side="left", kind="loc") 

2470 1 

2471 

2472 Like above, but if you get the locations from the rightmost 

2473 'b' in the first level and 'f' in the second level: 

2474 

2475 >>> mi.get_slice_bound(('b','f'), side="right", kind="loc") 

2476 3 

2477 

2478 See Also 

2479 -------- 

2480 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2481 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2482 sequence of such. 

2483 """ 

2484 

2485 if not isinstance(label, tuple): 

2486 label = (label,) 

2487 return self._partial_tup_index(label, side=side) 

2488 

2489 def slice_locs(self, start=None, end=None, step=None, kind=None): 

2490 """ 

2491 For an ordered MultiIndex, compute the slice locations for input 

2492 labels. 

2493 

2494 The input labels can be tuples representing partial levels, e.g. for a 

2495 MultiIndex with 3 levels, you can pass a single value (corresponding to 

2496 the first level), or a 1-, 2-, or 3-tuple. 

2497 

2498 Parameters 

2499 ---------- 

2500 start : label or tuple, default None 

2501 If None, defaults to the beginning 

2502 end : label or tuple 

2503 If None, defaults to the end 

2504 step : int or None 

2505 Slice step 

2506 kind : string, optional, defaults None 

2507 

2508 Returns 

2509 ------- 

2510 (start, end) : (int, int) 

2511 

2512 Notes 

2513 ----- 

2514 This method only works if the MultiIndex is properly lexsorted. So, 

2515 if only the first 2 levels of a 3-level MultiIndex are lexsorted, 

2516 you can only pass two levels to ``.slice_locs``. 

2517 

2518 Examples 

2519 -------- 

2520 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], 

2521 ... names=['A', 'B']) 

2522 

2523 Get the slice locations from the beginning of 'b' in the first level 

2524 until the end of the multiindex: 

2525 

2526 >>> mi.slice_locs(start='b') 

2527 (1, 4) 

2528 

2529 Like above, but stop at the end of 'b' in the first level and 'f' in 

2530 the second level: 

2531 

2532 >>> mi.slice_locs(start='b', end=('b', 'f')) 

2533 (1, 3) 

2534 

2535 See Also 

2536 -------- 

2537 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2538 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2539 sequence of such. 

2540 """ 

2541 # This function adds nothing to its parent implementation (the magic 

2542 # happens in get_slice_bound method), but it adds meaningful doc. 

2543 return super().slice_locs(start, end, step, kind=kind) 

2544 

2545 def _partial_tup_index(self, tup, side="left"): 

2546 if len(tup) > self.lexsort_depth: 

2547 raise UnsortedIndexError( 

2548 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth" 

2549 f" ({self.lexsort_depth})" 

2550 ) 

2551 

2552 n = len(tup) 

2553 start, end = 0, len(self) 

2554 zipped = zip(tup, self.levels, self.codes) 

2555 for k, (lab, lev, labs) in enumerate(zipped): 

2556 section = labs[start:end] 

2557 

2558 if lab not in lev and np.ndim(lab) == 0 and not isna(lab): 

2559 if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)): 

2560 raise TypeError(f"Level type mismatch: {lab}") 

2561 

2562 # short circuit 

2563 loc = lev.searchsorted(lab, side=side) 

2564 if side == "right" and loc >= 0: 

2565 loc -= 1 

2566 return start + section.searchsorted(loc, side=side) 

2567 

2568 idx = self._get_loc_single_level_index(lev, lab) 

2569 if k < n - 1: 

2570 end = start + section.searchsorted(idx, side="right") 

2571 start = start + section.searchsorted(idx, side="left") 

2572 else: 

2573 return start + section.searchsorted(idx, side=side) 

2574 

2575 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: 

2576 """ 

2577 If key is NA value, location of index unify as -1. 

2578 

2579 Parameters 

2580 ---------- 

2581 level_index: Index 

2582 key : label 

2583 

2584 Returns 

2585 ------- 

2586 loc : int 

2587 If key is NA value, loc is -1 

2588 Else, location of key in index. 

2589 

2590 See Also 

2591 -------- 

2592 Index.get_loc : The get_loc method for (single-level) index. 

2593 """ 

2594 

2595 if is_scalar(key) and isna(key): 

2596 return -1 

2597 else: 

2598 return level_index.get_loc(key) 

2599 

2600 def get_loc(self, key, method=None): 

2601 """ 

2602 Get location for a label or a tuple of labels as an integer, slice or 

2603 boolean mask. 

2604 

2605 Parameters 

2606 ---------- 

2607 key : label or tuple of labels (one for each level) 

2608 method : None 

2609 

2610 Returns 

2611 ------- 

2612 loc : int, slice object or boolean mask 

2613 If the key is past the lexsort depth, the return may be a 

2614 boolean mask array, otherwise it is always a slice or int. 

2615 

2616 See Also 

2617 -------- 

2618 Index.get_loc : The get_loc method for (single-level) index. 

2619 MultiIndex.slice_locs : Get slice location given start label(s) and 

2620 end label(s). 

2621 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2622 sequence of such. 

2623 

2624 Notes 

2625 ----- 

2626 The key cannot be a slice, list of same-level labels, a boolean mask, 

2627 or a sequence of such. If you want to use those, use 

2628 :meth:`MultiIndex.get_locs` instead. 

2629 

2630 Examples 

2631 -------- 

2632 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

2633 

2634 >>> mi.get_loc('b') 

2635 slice(1, 3, None) 

2636 

2637 >>> mi.get_loc(('b', 'e')) 

2638 1 

2639 """ 

2640 if method is not None: 

2641 raise NotImplementedError( 

2642 "only the default get_loc method is " 

2643 "currently supported for MultiIndex" 

2644 ) 

2645 

2646 def _maybe_to_slice(loc): 

2647 """convert integer indexer to boolean mask or slice if possible""" 

2648 if not isinstance(loc, np.ndarray) or loc.dtype != "int64": 

2649 return loc 

2650 

2651 loc = lib.maybe_indices_to_slice(loc, len(self)) 

2652 if isinstance(loc, slice): 

2653 return loc 

2654 

2655 mask = np.empty(len(self), dtype="bool") 

2656 mask.fill(False) 

2657 mask[loc] = True 

2658 return mask 

2659 

2660 if not isinstance(key, (tuple, list)): 

2661 # not including list here breaks some indexing, xref #30892 

2662 loc = self._get_level_indexer(key, level=0) 

2663 return _maybe_to_slice(loc) 

2664 

2665 keylen = len(key) 

2666 if self.nlevels < keylen: 

2667 raise KeyError( 

2668 f"Key length ({keylen}) exceeds index depth ({self.nlevels})" 

2669 ) 

2670 

2671 if keylen == self.nlevels and self.is_unique: 

2672 return self._engine.get_loc(key) 

2673 

2674 # -- partial selection or non-unique index 

2675 # break the key into 2 parts based on the lexsort_depth of the index; 

2676 # the first part returns a continuous slice of the index; the 2nd part 

2677 # needs linear search within the slice 

2678 i = self.lexsort_depth 

2679 lead_key, follow_key = key[:i], key[i:] 

2680 start, stop = ( 

2681 self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) 

2682 ) 

2683 

2684 if start == stop: 

2685 raise KeyError(key) 

2686 

2687 if not follow_key: 

2688 return slice(start, stop) 

2689 

2690 warnings.warn( 

2691 "indexing past lexsort depth may impact performance.", 

2692 PerformanceWarning, 

2693 stacklevel=10, 

2694 ) 

2695 

2696 loc = np.arange(start, stop, dtype="int64") 

2697 

2698 for i, k in enumerate(follow_key, len(lead_key)): 

2699 mask = self.codes[i][loc] == self._get_loc_single_level_index( 

2700 self.levels[i], k 

2701 ) 

2702 if not mask.all(): 

2703 loc = loc[mask] 

2704 if not len(loc): 

2705 raise KeyError(key) 

2706 

2707 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) 

2708 

2709 def get_loc_level(self, key, level=0, drop_level: bool = True): 

2710 """ 

2711 Get both the location for the requested label(s) and the 

2712 resulting sliced index. 

2713 

2714 Parameters 

2715 ---------- 

2716 key : label or sequence of labels 

2717 level : int/level name or list thereof, optional 

2718 drop_level : bool, default True 

2719 If ``False``, the resulting index will not drop any level. 

2720 

2721 Returns 

2722 ------- 

2723 loc : A 2-tuple where the elements are: 

2724 Element 0: int, slice object or boolean array 

2725 Element 1: The resulting sliced multiindex/index. If the key 

2726 contains all levels, this will be ``None``. 

2727 

2728 See Also 

2729 -------- 

2730 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2731 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2732 sequence of such. 

2733 

2734 Examples 

2735 -------- 

2736 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], 

2737 ... names=['A', 'B']) 

2738 

2739 >>> mi.get_loc_level('b') 

2740 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) 

2741 

2742 >>> mi.get_loc_level('e', level='B') 

2743 (array([False, True, False], dtype=bool), 

2744 Index(['b'], dtype='object', name='A')) 

2745 

2746 >>> mi.get_loc_level(['b', 'e']) 

2747 (1, None) 

2748 """ 

2749 

2750 # different name to distinguish from maybe_droplevels 

2751 def maybe_mi_droplevels(indexer, levels, drop_level: bool): 

2752 if not drop_level: 

2753 return self[indexer] 

2754 # kludgearound 

2755 orig_index = new_index = self[indexer] 

2756 levels = [self._get_level_number(i) for i in levels] 

2757 for i in sorted(levels, reverse=True): 

2758 try: 

2759 new_index = new_index.droplevel(i) 

2760 except ValueError: 

2761 

2762 # no dropping here 

2763 return orig_index 

2764 return new_index 

2765 

2766 if isinstance(level, (tuple, list)): 

2767 if len(key) != len(level): 

2768 raise AssertionError( 

2769 "Key for location must have same length as number of levels" 

2770 ) 

2771 result = None 

2772 for lev, k in zip(level, key): 

2773 loc, new_index = self.get_loc_level(k, level=lev) 

2774 if isinstance(loc, slice): 

2775 mask = np.zeros(len(self), dtype=bool) 

2776 mask[loc] = True 

2777 loc = mask 

2778 

2779 result = loc if result is None else result & loc 

2780 

2781 return result, maybe_mi_droplevels(result, level, drop_level) 

2782 

2783 level = self._get_level_number(level) 

2784 

2785 # kludge for #1796 

2786 if isinstance(key, list): 

2787 key = tuple(key) 

2788 

2789 if isinstance(key, tuple) and level == 0: 

2790 

2791 try: 

2792 if key in self.levels[0]: 

2793 indexer = self._get_level_indexer(key, level=level) 

2794 new_index = maybe_mi_droplevels(indexer, [0], drop_level) 

2795 return indexer, new_index 

2796 except TypeError: 

2797 pass 

2798 

2799 if not any(isinstance(k, slice) for k in key): 

2800 

2801 # partial selection 

2802 # optionally get indexer to avoid re-calculation 

2803 def partial_selection(key, indexer=None): 

2804 if indexer is None: 

2805 indexer = self.get_loc(key) 

2806 ilevels = [ 

2807 i for i in range(len(key)) if key[i] != slice(None, None) 

2808 ] 

2809 return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) 

2810 

2811 if len(key) == self.nlevels and self.is_unique: 

2812 # Complete key in unique index -> standard get_loc 

2813 try: 

2814 return (self._engine.get_loc(key), None) 

2815 except KeyError as e: 

2816 raise KeyError(key) from e 

2817 else: 

2818 return partial_selection(key) 

2819 else: 

2820 indexer = None 

2821 for i, k in enumerate(key): 

2822 if not isinstance(k, slice): 

2823 k = self._get_level_indexer(k, level=i) 

2824 if isinstance(k, slice): 

2825 # everything 

2826 if k.start == 0 and k.stop == len(self): 

2827 k = slice(None, None) 

2828 else: 

2829 k_index = k 

2830 

2831 if isinstance(k, slice): 

2832 if k == slice(None, None): 

2833 continue 

2834 else: 

2835 raise TypeError(key) 

2836 

2837 if indexer is None: 

2838 indexer = k_index 

2839 else: # pragma: no cover 

2840 indexer &= k_index 

2841 if indexer is None: 

2842 indexer = slice(None, None) 

2843 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

2844 return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) 

2845 else: 

2846 indexer = self._get_level_indexer(key, level=level) 

2847 return indexer, maybe_mi_droplevels(indexer, [level], drop_level) 

2848 

2849 def _get_level_indexer(self, key, level=0, indexer=None): 

2850 # return an indexer, boolean array or a slice showing where the key is 

2851 # in the totality of values 

2852 # if the indexer is provided, then use this 

2853 

2854 level_index = self.levels[level] 

2855 level_codes = self.codes[level] 

2856 

2857 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): 

2858 # given the inputs and the codes/indexer, compute an indexer set 

2859 # if we have a provided indexer, then this need not consider 

2860 # the entire labels set 

2861 

2862 r = np.arange(start, stop, step) 

2863 if indexer is not None and len(indexer) != len(codes): 

2864 

2865 # we have an indexer which maps the locations in the labels 

2866 # that we have already selected (and is not an indexer for the 

2867 # entire set) otherwise this is wasteful so we only need to 

2868 # examine locations that are in this set the only magic here is 

2869 # that the result are the mappings to the set that we have 

2870 # selected 

2871 from pandas import Series 

2872 

2873 mapper = Series(indexer) 

2874 indexer = codes.take(ensure_platform_int(indexer)) 

2875 result = Series(Index(indexer).isin(r).nonzero()[0]) 

2876 m = result.map(mapper)._ndarray_values 

2877 

2878 else: 

2879 m = np.zeros(len(codes), dtype=bool) 

2880 m[np.in1d(codes, r, assume_unique=Index(codes).is_unique)] = True 

2881 

2882 return m 

2883 

2884 if isinstance(key, slice): 

2885 # handle a slice, returning a slice if we can 

2886 # otherwise a boolean indexer 

2887 

2888 try: 

2889 if key.start is not None: 

2890 start = level_index.get_loc(key.start) 

2891 else: 

2892 start = 0 

2893 if key.stop is not None: 

2894 stop = level_index.get_loc(key.stop) 

2895 else: 

2896 stop = len(level_index) - 1 

2897 step = key.step 

2898 except KeyError: 

2899 

2900 # we have a partial slice (like looking up a partial date 

2901 # string) 

2902 start = stop = level_index.slice_indexer( 

2903 key.start, key.stop, key.step, kind="loc" 

2904 ) 

2905 step = start.step 

2906 

2907 if isinstance(start, slice) or isinstance(stop, slice): 

2908 # we have a slice for start and/or stop 

2909 # a partial date slicer on a DatetimeIndex generates a slice 

2910 # note that the stop ALREADY includes the stopped point (if 

2911 # it was a string sliced) 

2912 start = getattr(start, "start", start) 

2913 stop = getattr(stop, "stop", stop) 

2914 return convert_indexer(start, stop, step) 

2915 

2916 elif level > 0 or self.lexsort_depth == 0 or step is not None: 

2917 # need to have like semantics here to right 

2918 # searching as when we are using a slice 

2919 # so include the stop+1 (so we include stop) 

2920 return convert_indexer(start, stop + 1, step) 

2921 else: 

2922 # sorted, so can return slice object -> view 

2923 i = level_codes.searchsorted(start, side="left") 

2924 j = level_codes.searchsorted(stop, side="right") 

2925 return slice(i, j, step) 

2926 

2927 else: 

2928 

2929 code = self._get_loc_single_level_index(level_index, key) 

2930 

2931 if level > 0 or self.lexsort_depth == 0: 

2932 # Desired level is not sorted 

2933 locs = np.array(level_codes == code, dtype=bool, copy=False) 

2934 if not locs.any(): 

2935 # The label is present in self.levels[level] but unused: 

2936 raise KeyError(key) 

2937 return locs 

2938 

2939 i = level_codes.searchsorted(code, side="left") 

2940 j = level_codes.searchsorted(code, side="right") 

2941 if i == j: 

2942 # The label is present in self.levels[level] but unused: 

2943 raise KeyError(key) 

2944 return slice(i, j) 

2945 

2946 def get_locs(self, seq): 

2947 """ 

2948 Get location for a sequence of labels. 

2949 

2950 Parameters 

2951 ---------- 

2952 seq : label, slice, list, mask or a sequence of such 

2953 You should use one of the above for each level. 

2954 If a level should not be used, set it to ``slice(None)``. 

2955 

2956 Returns 

2957 ------- 

2958 numpy.ndarray 

2959 NumPy array of integers suitable for passing to iloc. 

2960 

2961 See Also 

2962 -------- 

2963 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2964 MultiIndex.slice_locs : Get slice location given start label(s) and 

2965 end label(s). 

2966 

2967 Examples 

2968 -------- 

2969 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

2970 

2971 >>> mi.get_locs('b') # doctest: +SKIP 

2972 array([1, 2], dtype=int64) 

2973 

2974 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP 

2975 array([1, 2], dtype=int64) 

2976 

2977 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP 

2978 array([2], dtype=int64) 

2979 """ 

2980 from pandas.core.indexes.numeric import Int64Index 

2981 

2982 # must be lexsorted to at least as many levels 

2983 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] 

2984 if true_slices and true_slices[-1] >= self.lexsort_depth: 

2985 raise UnsortedIndexError( 

2986 "MultiIndex slicing requires the index to be lexsorted: slicing " 

2987 f"on levels {true_slices}, lexsort depth {self.lexsort_depth}" 

2988 ) 

2989 # indexer 

2990 # this is the list of all values that we want to select 

2991 n = len(self) 

2992 indexer = None 

2993 

2994 def _convert_to_indexer(r): 

2995 # return an indexer 

2996 if isinstance(r, slice): 

2997 m = np.zeros(n, dtype=bool) 

2998 m[r] = True 

2999 r = m.nonzero()[0] 

3000 elif com.is_bool_indexer(r): 

3001 if len(r) != n: 

3002 raise ValueError( 

3003 "cannot index with a boolean indexer " 

3004 "that is not the same length as the " 

3005 "index" 

3006 ) 

3007 r = r.nonzero()[0] 

3008 return Int64Index(r) 

3009 

3010 def _update_indexer(idxr, indexer=indexer): 

3011 if indexer is None: 

3012 indexer = Index(np.arange(n)) 

3013 if idxr is None: 

3014 return indexer 

3015 return indexer & idxr 

3016 

3017 for i, k in enumerate(seq): 

3018 

3019 if com.is_bool_indexer(k): 

3020 # a boolean indexer, must be the same length! 

3021 k = np.asarray(k) 

3022 indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer) 

3023 

3024 elif is_list_like(k): 

3025 # a collection of labels to include from this level (these 

3026 # are or'd) 

3027 indexers = None 

3028 for x in k: 

3029 try: 

3030 idxrs = _convert_to_indexer( 

3031 self._get_level_indexer(x, level=i, indexer=indexer) 

3032 ) 

3033 indexers = idxrs if indexers is None else indexers | idxrs 

3034 except KeyError: 

3035 

3036 # ignore not founds 

3037 continue 

3038 

3039 if indexers is not None: 

3040 indexer = _update_indexer(indexers, indexer=indexer) 

3041 else: 

3042 # no matches we are done 

3043 return Int64Index([])._ndarray_values 

3044 

3045 elif com.is_null_slice(k): 

3046 # empty slice 

3047 indexer = _update_indexer(None, indexer=indexer) 

3048 

3049 elif isinstance(k, slice): 

3050 

3051 # a slice, include BOTH of the labels 

3052 indexer = _update_indexer( 

3053 _convert_to_indexer( 

3054 self._get_level_indexer(k, level=i, indexer=indexer) 

3055 ), 

3056 indexer=indexer, 

3057 ) 

3058 else: 

3059 # a single label 

3060 indexer = _update_indexer( 

3061 _convert_to_indexer( 

3062 self.get_loc_level(k, level=i, drop_level=False)[0] 

3063 ), 

3064 indexer=indexer, 

3065 ) 

3066 

3067 # empty indexer 

3068 if indexer is None: 

3069 return Int64Index([])._ndarray_values 

3070 return indexer._ndarray_values 

3071 

3072 def truncate(self, before=None, after=None): 

3073 """ 

3074 Slice index between two labels / tuples, return new MultiIndex 

3075 

3076 Parameters 

3077 ---------- 

3078 before : label or tuple, can be partial. Default None 

3079 None defaults to start 

3080 after : label or tuple, can be partial. Default None 

3081 None defaults to end 

3082 

3083 Returns 

3084 ------- 

3085 truncated : MultiIndex 

3086 """ 

3087 if after and before and after < before: 

3088 raise ValueError("after < before") 

3089 

3090 i, j = self.levels[0].slice_locs(before, after) 

3091 left, right = self.slice_locs(before, after) 

3092 

3093 new_levels = list(self.levels) 

3094 new_levels[0] = new_levels[0][i:j] 

3095 

3096 new_codes = [level_codes[left:right] for level_codes in self.codes] 

3097 new_codes[0] = new_codes[0] - i 

3098 

3099 return MultiIndex(levels=new_levels, codes=new_codes, verify_integrity=False) 

3100 

3101 def equals(self, other) -> bool: 

3102 """ 

3103 Determines if two MultiIndex objects have the same labeling information 

3104 (the levels themselves do not necessarily have to be the same) 

3105 

3106 See Also 

3107 -------- 

3108 equal_levels 

3109 """ 

3110 if self.is_(other): 

3111 return True 

3112 

3113 if not isinstance(other, Index): 

3114 return False 

3115 

3116 if not isinstance(other, MultiIndex): 

3117 # d-level MultiIndex can equal d-tuple Index 

3118 if not is_object_dtype(other.dtype): 

3119 if self.nlevels != other.nlevels: 

3120 return False 

3121 

3122 other_vals = com.values_from_object(ensure_index(other)) 

3123 return array_equivalent(self._ndarray_values, other_vals) 

3124 

3125 if self.nlevels != other.nlevels: 

3126 return False 

3127 

3128 if len(self) != len(other): 

3129 return False 

3130 

3131 for i in range(self.nlevels): 

3132 self_codes = self.codes[i] 

3133 self_codes = self_codes[self_codes != -1] 

3134 self_values = algos.take_nd( 

3135 np.asarray(self.levels[i]._values), self_codes, allow_fill=False 

3136 ) 

3137 

3138 other_codes = other.codes[i] 

3139 other_codes = other_codes[other_codes != -1] 

3140 other_values = algos.take_nd( 

3141 np.asarray(other.levels[i]._values), other_codes, allow_fill=False 

3142 ) 

3143 

3144 # since we use NaT both datetime64 and timedelta64 

3145 # we can have a situation where a level is typed say 

3146 # timedelta64 in self (IOW it has other values than NaT) 

3147 # but types datetime64 in other (where its all NaT) 

3148 # but these are equivalent 

3149 if len(self_values) == 0 and len(other_values) == 0: 

3150 continue 

3151 

3152 if not array_equivalent(self_values, other_values): 

3153 return False 

3154 

3155 return True 

3156 

3157 def equal_levels(self, other): 

3158 """ 

3159 Return True if the levels of both MultiIndex objects are the same 

3160 

3161 """ 

3162 if self.nlevels != other.nlevels: 

3163 return False 

3164 

3165 for i in range(self.nlevels): 

3166 if not self.levels[i].equals(other.levels[i]): 

3167 return False 

3168 return True 

3169 

3170 def union(self, other, sort=None): 

3171 """ 

3172 Form the union of two MultiIndex objects 

3173 

3174 Parameters 

3175 ---------- 

3176 other : MultiIndex or array / Index of tuples 

3177 sort : False or None, default None 

3178 Whether to sort the resulting Index. 

3179 

3180 * None : Sort the result, except when 

3181 

3182 1. `self` and `other` are equal. 

3183 2. `self` has length 0. 

3184 3. Some values in `self` or `other` cannot be compared. 

3185 A RuntimeWarning is issued in this case. 

3186 

3187 * False : do not sort the result. 

3188 

3189 .. versionadded:: 0.24.0 

3190 

3191 .. versionchanged:: 0.24.1 

3192 

3193 Changed the default value from ``True`` to ``None`` 

3194 (without change in behaviour). 

3195 

3196 Returns 

3197 ------- 

3198 Index 

3199 

3200 >>> index.union(index2) 

3201 """ 

3202 self._validate_sort_keyword(sort) 

3203 self._assert_can_do_setop(other) 

3204 other, result_names = self._convert_can_do_setop(other) 

3205 

3206 if len(other) == 0 or self.equals(other): 

3207 return self 

3208 

3209 # TODO: Index.union returns other when `len(self)` is 0. 

3210 

3211 uniq_tuples = lib.fast_unique_multiple( 

3212 [self._ndarray_values, other._ndarray_values], sort=sort 

3213 ) 

3214 

3215 return MultiIndex.from_arrays( 

3216 zip(*uniq_tuples), sortorder=0, names=result_names 

3217 ) 

3218 

3219 def intersection(self, other, sort=False): 

3220 """ 

3221 Form the intersection of two MultiIndex objects. 

3222 

3223 Parameters 

3224 ---------- 

3225 other : MultiIndex or array / Index of tuples 

3226 sort : False or None, default False 

3227 Sort the resulting MultiIndex if possible 

3228 

3229 .. versionadded:: 0.24.0 

3230 

3231 .. versionchanged:: 0.24.1 

3232 

3233 Changed the default from ``True`` to ``False``, to match 

3234 behaviour from before 0.24.0 

3235 

3236 Returns 

3237 ------- 

3238 Index 

3239 """ 

3240 self._validate_sort_keyword(sort) 

3241 self._assert_can_do_setop(other) 

3242 other, result_names = self._convert_can_do_setop(other) 

3243 

3244 if self.equals(other): 

3245 return self 

3246 

3247 self_tuples = self._ndarray_values 

3248 other_tuples = other._ndarray_values 

3249 uniq_tuples = set(self_tuples) & set(other_tuples) 

3250 

3251 if sort is None: 

3252 uniq_tuples = sorted(uniq_tuples) 

3253 

3254 if len(uniq_tuples) == 0: 

3255 return MultiIndex( 

3256 levels=self.levels, 

3257 codes=[[]] * self.nlevels, 

3258 names=result_names, 

3259 verify_integrity=False, 

3260 ) 

3261 else: 

3262 return MultiIndex.from_arrays( 

3263 zip(*uniq_tuples), sortorder=0, names=result_names 

3264 ) 

3265 

3266 def difference(self, other, sort=None): 

3267 """ 

3268 Compute set difference of two MultiIndex objects 

3269 

3270 Parameters 

3271 ---------- 

3272 other : MultiIndex 

3273 sort : False or None, default None 

3274 Sort the resulting MultiIndex if possible 

3275 

3276 .. versionadded:: 0.24.0 

3277 

3278 .. versionchanged:: 0.24.1 

3279 

3280 Changed the default value from ``True`` to ``None`` 

3281 (without change in behaviour). 

3282 

3283 Returns 

3284 ------- 

3285 diff : MultiIndex 

3286 """ 

3287 self._validate_sort_keyword(sort) 

3288 self._assert_can_do_setop(other) 

3289 other, result_names = self._convert_can_do_setop(other) 

3290 

3291 if len(other) == 0: 

3292 return self 

3293 

3294 if self.equals(other): 

3295 return MultiIndex( 

3296 levels=self.levels, 

3297 codes=[[]] * self.nlevels, 

3298 names=result_names, 

3299 verify_integrity=False, 

3300 ) 

3301 

3302 this = self._get_unique_index() 

3303 

3304 indexer = this.get_indexer(other) 

3305 indexer = indexer.take((indexer != -1).nonzero()[0]) 

3306 

3307 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) 

3308 difference = this.values.take(label_diff) 

3309 if sort is None: 

3310 difference = sorted(difference) 

3311 

3312 if len(difference) == 0: 

3313 return MultiIndex( 

3314 levels=[[]] * self.nlevels, 

3315 codes=[[]] * self.nlevels, 

3316 names=result_names, 

3317 verify_integrity=False, 

3318 ) 

3319 else: 

3320 return MultiIndex.from_tuples(difference, sortorder=0, names=result_names) 

3321 

3322 @Appender(_index_shared_docs["astype"]) 

3323 def astype(self, dtype, copy=True): 

3324 dtype = pandas_dtype(dtype) 

3325 if is_categorical_dtype(dtype): 

3326 msg = "> 1 ndim Categorical are not supported at this time" 

3327 raise NotImplementedError(msg) 

3328 elif not is_object_dtype(dtype): 

3329 raise TypeError( 

3330 f"Setting {type(self)} dtype to anything other " 

3331 "than object is not supported" 

3332 ) 

3333 elif copy is True: 

3334 return self._shallow_copy() 

3335 return self 

3336 

3337 def _convert_can_do_setop(self, other): 

3338 result_names = self.names 

3339 

3340 if not hasattr(other, "names"): 

3341 if len(other) == 0: 

3342 other = MultiIndex( 

3343 levels=[[]] * self.nlevels, 

3344 codes=[[]] * self.nlevels, 

3345 verify_integrity=False, 

3346 ) 

3347 else: 

3348 msg = "other must be a MultiIndex or a list of tuples" 

3349 try: 

3350 other = MultiIndex.from_tuples(other) 

3351 except TypeError: 

3352 raise TypeError(msg) 

3353 else: 

3354 result_names = self.names if self.names == other.names else None 

3355 return other, result_names 

3356 

3357 def insert(self, loc, item): 

3358 """ 

3359 Make new MultiIndex inserting new item at location 

3360 

3361 Parameters 

3362 ---------- 

3363 loc : int 

3364 item : tuple 

3365 Must be same length as number of levels in the MultiIndex 

3366 

3367 Returns 

3368 ------- 

3369 new_index : Index 

3370 """ 

3371 # Pad the key with empty strings if lower levels of the key 

3372 # aren't specified: 

3373 if not isinstance(item, tuple): 

3374 item = (item,) + ("",) * (self.nlevels - 1) 

3375 elif len(item) != self.nlevels: 

3376 raise ValueError("Item must have length equal to number of levels.") 

3377 

3378 new_levels = [] 

3379 new_codes = [] 

3380 for k, level, level_codes in zip(item, self.levels, self.codes): 

3381 if k not in level: 

3382 # have to insert into level 

3383 # must insert at end otherwise you have to recompute all the 

3384 # other codes 

3385 lev_loc = len(level) 

3386 level = level.insert(lev_loc, k) 

3387 else: 

3388 lev_loc = level.get_loc(k) 

3389 

3390 new_levels.append(level) 

3391 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) 

3392 

3393 return MultiIndex( 

3394 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3395 ) 

3396 

3397 def delete(self, loc): 

3398 """ 

3399 Make new index with passed location deleted 

3400 

3401 Returns 

3402 ------- 

3403 new_index : MultiIndex 

3404 """ 

3405 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] 

3406 return MultiIndex( 

3407 levels=self.levels, 

3408 codes=new_codes, 

3409 names=self.names, 

3410 verify_integrity=False, 

3411 ) 

3412 

3413 def _wrap_joined_index(self, joined, other): 

3414 names = self.names if self.names == other.names else None 

3415 return MultiIndex.from_tuples(joined, names=names) 

3416 

3417 @Appender(Index.isin.__doc__) 

3418 def isin(self, values, level=None): 

3419 if level is None: 

3420 values = MultiIndex.from_tuples(values, names=self.names).values 

3421 return algos.isin(self.values, values) 

3422 else: 

3423 num = self._get_level_number(level) 

3424 levs = self.get_level_values(num) 

3425 

3426 if levs.size == 0: 

3427 return np.zeros(len(levs), dtype=np.bool_) 

3428 return levs.isin(values) 

3429 

3430 

3431MultiIndex._add_numeric_methods_disabled() 

3432MultiIndex._add_numeric_methods_add_sub_disabled() 

3433MultiIndex._add_logical_methods_disabled() 

3434 

3435 

3436def _sparsify(label_list, start: int = 0, sentinel=""): 

3437 pivoted = list(zip(*label_list)) 

3438 k = len(label_list) 

3439 

3440 result = pivoted[: start + 1] 

3441 prev = pivoted[start] 

3442 

3443 for cur in pivoted[start + 1 :]: 

3444 sparse_cur = [] 

3445 

3446 for i, (p, t) in enumerate(zip(prev, cur)): 

3447 if i == k - 1: 

3448 sparse_cur.append(t) 

3449 result.append(sparse_cur) 

3450 break 

3451 

3452 if p == t: 

3453 sparse_cur.append(sentinel) 

3454 else: 

3455 sparse_cur.extend(cur[i:]) 

3456 result.append(sparse_cur) 

3457 break 

3458 

3459 prev = cur 

3460 

3461 return list(zip(*result)) 

3462 

3463 

3464def _get_na_rep(dtype) -> str: 

3465 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") 

3466 

3467 

3468def maybe_droplevels(index, key): 

3469 """ 

3470 Attempt to drop level or levels from the given index. 

3471 

3472 Parameters 

3473 ---------- 

3474 index: Index 

3475 key : scalar or tuple 

3476 

3477 Returns 

3478 ------- 

3479 Index 

3480 """ 

3481 # drop levels 

3482 original_index = index 

3483 if isinstance(key, tuple): 

3484 for _ in key: 

3485 try: 

3486 index = index.droplevel(0) 

3487 except ValueError: 

3488 # we have dropped too much, so back out 

3489 return original_index 

3490 else: 

3491 try: 

3492 index = index.droplevel(0) 

3493 except ValueError: 

3494 pass 

3495 

3496 return index 

3497 

3498 

3499def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: 

3500 """ 

3501 Coerce the array_like indexer to the smallest integer dtype that can encode all 

3502 of the given categories. 

3503 

3504 Parameters 

3505 ---------- 

3506 array_like : array-like 

3507 categories : array-like 

3508 copy : bool 

3509 

3510 Returns 

3511 ------- 

3512 np.ndarray 

3513 Non-writeable. 

3514 """ 

3515 array_like = coerce_indexer_dtype(array_like, categories) 

3516 if copy: 

3517 array_like = array_like.copy() 

3518 array_like.flags.writeable = False 

3519 return array_like