Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" routings for casting """ 

2 

3from datetime import datetime, timedelta 

4 

5import numpy as np 

6 

7from pandas._libs import lib, tslib, tslibs 

8from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT 

9from pandas._libs.tslibs.timezones import tz_compare 

10from pandas._typing import Dtype 

11from pandas.util._validators import validate_bool_kwarg 

12 

13from pandas.core.dtypes.common import ( 

14 _INT64_DTYPE, 

15 _NS_DTYPE, 

16 _POSSIBLY_CAST_DTYPES, 

17 _TD_DTYPE, 

18 ensure_int8, 

19 ensure_int16, 

20 ensure_int32, 

21 ensure_int64, 

22 ensure_object, 

23 ensure_str, 

24 is_bool, 

25 is_bool_dtype, 

26 is_complex, 

27 is_complex_dtype, 

28 is_datetime64_dtype, 

29 is_datetime64_ns_dtype, 

30 is_datetime64tz_dtype, 

31 is_datetime_or_timedelta_dtype, 

32 is_dtype_equal, 

33 is_extension_array_dtype, 

34 is_float, 

35 is_float_dtype, 

36 is_integer, 

37 is_integer_dtype, 

38 is_numeric_dtype, 

39 is_object_dtype, 

40 is_scalar, 

41 is_string_dtype, 

42 is_timedelta64_dtype, 

43 is_timedelta64_ns_dtype, 

44 is_unsigned_integer_dtype, 

45 pandas_dtype, 

46) 

47from pandas.core.dtypes.dtypes import ( 

48 DatetimeTZDtype, 

49 ExtensionDtype, 

50 IntervalDtype, 

51 PeriodDtype, 

52) 

53from pandas.core.dtypes.generic import ( 

54 ABCDataFrame, 

55 ABCDatetimeArray, 

56 ABCDatetimeIndex, 

57 ABCPeriodArray, 

58 ABCPeriodIndex, 

59 ABCSeries, 

60) 

61from pandas.core.dtypes.inference import is_list_like 

62from pandas.core.dtypes.missing import isna, notna 

63 

64_int8_max = np.iinfo(np.int8).max 

65_int16_max = np.iinfo(np.int16).max 

66_int32_max = np.iinfo(np.int32).max 

67_int64_max = np.iinfo(np.int64).max 

68 

69 

70def maybe_convert_platform(values): 

71 """ try to do platform conversion, allow ndarray or list here """ 

72 

73 if isinstance(values, (list, tuple, range)): 

74 values = construct_1d_object_array_from_listlike(values) 

75 if getattr(values, "dtype", None) == np.object_: 

76 if hasattr(values, "_values"): 

77 values = values._values 

78 values = lib.maybe_convert_objects(values) 

79 

80 return values 

81 

82 

83def is_nested_object(obj) -> bool: 

84 """ 

85 return a boolean if we have a nested object, e.g. a Series with 1 or 

86 more Series elements 

87 

88 This may not be necessarily be performant. 

89 

90 """ 

91 

92 if isinstance(obj, ABCSeries) and is_object_dtype(obj): 

93 

94 if any(isinstance(v, ABCSeries) for v in obj.values): 

95 return True 

96 

97 return False 

98 

99 

100def maybe_downcast_to_dtype(result, dtype): 

101 """ try to cast to the specified dtype (e.g. convert back to bool/int 

102 or could be an astype of float64->float32 

103 """ 

104 do_round = False 

105 

106 if is_scalar(result): 

107 return result 

108 elif isinstance(result, ABCDataFrame): 

109 # occurs in pivot_table doctest 

110 return result 

111 

112 if isinstance(dtype, str): 

113 if dtype == "infer": 

114 inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) 

115 if inferred_type == "boolean": 

116 dtype = "bool" 

117 elif inferred_type == "integer": 

118 dtype = "int64" 

119 elif inferred_type == "datetime64": 

120 dtype = "datetime64[ns]" 

121 elif inferred_type == "timedelta64": 

122 dtype = "timedelta64[ns]" 

123 

124 # try to upcast here 

125 elif inferred_type == "floating": 

126 dtype = "int64" 

127 if issubclass(result.dtype.type, np.number): 

128 do_round = True 

129 

130 else: 

131 dtype = "object" 

132 

133 dtype = np.dtype(dtype) 

134 

135 converted = maybe_downcast_numeric(result, dtype, do_round) 

136 if converted is not result: 

137 return converted 

138 

139 # a datetimelike 

140 # GH12821, iNaT is casted to float 

141 if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: 

142 if hasattr(dtype, "tz"): 

143 # not a numpy dtype 

144 if dtype.tz: 

145 # convert to datetime and change timezone 

146 from pandas import to_datetime 

147 

148 result = to_datetime(result).tz_localize("utc") 

149 result = result.tz_convert(dtype.tz) 

150 else: 

151 result = result.astype(dtype) 

152 

153 elif dtype.type is Period: 

154 # TODO(DatetimeArray): merge with previous elif 

155 from pandas.core.arrays import PeriodArray 

156 

157 try: 

158 return PeriodArray(result, freq=dtype.freq) 

159 except TypeError: 

160 # e.g. TypeError: int() argument must be a string, a 

161 # bytes-like object or a number, not 'Period 

162 pass 

163 

164 return result 

165 

166 

167def maybe_downcast_numeric(result, dtype, do_round: bool = False): 

168 """ 

169 Subset of maybe_downcast_to_dtype restricted to numeric dtypes. 

170 

171 Parameters 

172 ---------- 

173 result : ndarray or ExtensionArray 

174 dtype : np.dtype or ExtensionDtype 

175 do_round : bool 

176 

177 Returns 

178 ------- 

179 ndarray or ExtensionArray 

180 """ 

181 if not isinstance(dtype, np.dtype): 

182 # e.g. SparseDtype has no itemsize attr 

183 return result 

184 

185 if isinstance(result, list): 

186 # reached via groupoby.agg _ohlc; really this should be handled 

187 # earlier 

188 result = np.array(result) 

189 

190 def trans(x): 

191 if do_round: 

192 return x.round() 

193 return x 

194 

195 if dtype.kind == result.dtype.kind: 

196 # don't allow upcasts here (except if empty) 

197 if result.dtype.itemsize <= dtype.itemsize and result.size: 

198 return result 

199 

200 if is_bool_dtype(dtype) or is_integer_dtype(dtype): 

201 

202 if not result.size: 

203 # if we don't have any elements, just astype it 

204 return trans(result).astype(dtype) 

205 

206 # do a test on the first element, if it fails then we are done 

207 r = result.ravel() 

208 arr = np.array([r[0]]) 

209 

210 if isna(arr).any(): 

211 # if we have any nulls, then we are done 

212 return result 

213 

214 elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, float, bool)): 

215 # a comparable, e.g. a Decimal may slip in here 

216 return result 

217 

218 if ( 

219 issubclass(result.dtype.type, (np.object_, np.number)) 

220 and notna(result).all() 

221 ): 

222 new_result = trans(result).astype(dtype) 

223 if new_result.dtype.kind == "O" or result.dtype.kind == "O": 

224 # np.allclose may raise TypeError on object-dtype 

225 if (new_result == result).all(): 

226 return new_result 

227 else: 

228 if np.allclose(new_result, result, rtol=0): 

229 return new_result 

230 

231 elif ( 

232 issubclass(dtype.type, np.floating) 

233 and not is_bool_dtype(result.dtype) 

234 and not is_string_dtype(result.dtype) 

235 ): 

236 return result.astype(dtype) 

237 

238 return result 

239 

240 

241def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): 

242 """ 

243 A safe version of putmask that potentially upcasts the result. 

244 The result is replaced with the first N elements of other, 

245 where N is the number of True values in mask. 

246 If the length of other is shorter than N, other will be repeated. 

247 

248 Parameters 

249 ---------- 

250 result : ndarray 

251 The destination array. This will be mutated in-place if no upcasting is 

252 necessary. 

253 mask : boolean ndarray 

254 other : scalar 

255 The source value. 

256 

257 Returns 

258 ------- 

259 result : ndarray 

260 changed : bool 

261 Set to true if the result array was upcasted. 

262 

263 Examples 

264 -------- 

265 >>> result, _ = maybe_upcast_putmask(np.arange(1,6), 

266 np.array([False, True, False, True, True]), np.arange(21,23)) 

267 >>> result 

268 array([1, 21, 3, 22, 21]) 

269 """ 

270 

271 if not isinstance(result, np.ndarray): 

272 raise ValueError("The result input must be a ndarray.") 

273 if not is_scalar(other): 

274 # We _could_ support non-scalar other, but until we have a compelling 

275 # use case, we assume away the possibility. 

276 raise ValueError("other must be a scalar") 

277 

278 if mask.any(): 

279 # Two conversions for date-like dtypes that can't be done automatically 

280 # in np.place: 

281 # NaN -> NaT 

282 # integer or integer array -> date-like array 

283 if result.dtype.kind in ["m", "M"]: 

284 if is_scalar(other): 

285 if isna(other): 

286 other = result.dtype.type("nat") 

287 elif is_integer(other): 

288 other = np.array(other, dtype=result.dtype) 

289 elif is_integer_dtype(other): 

290 other = np.array(other, dtype=result.dtype) 

291 

292 def changeit(): 

293 

294 # try to directly set by expanding our array to full 

295 # length of the boolean 

296 try: 

297 om = other[mask] 

298 except (IndexError, TypeError): 

299 # IndexError occurs in test_upcast when we have a boolean 

300 # mask of the wrong shape 

301 # TypeError occurs in test_upcast when `other` is a bool 

302 pass 

303 else: 

304 om_at = om.astype(result.dtype) 

305 if (om == om_at).all(): 

306 new_result = result.values.copy() 

307 new_result[mask] = om_at 

308 result[:] = new_result 

309 return result, False 

310 

311 # we are forced to change the dtype of the result as the input 

312 # isn't compatible 

313 r, _ = maybe_upcast(result, fill_value=other, copy=True) 

314 np.place(r, mask, other) 

315 

316 return r, True 

317 

318 # we want to decide whether place will work 

319 # if we have nans in the False portion of our mask then we need to 

320 # upcast (possibly), otherwise we DON't want to upcast (e.g. if we 

321 # have values, say integers, in the success portion then it's ok to not 

322 # upcast) 

323 new_dtype, _ = maybe_promote(result.dtype, other) 

324 if new_dtype != result.dtype: 

325 

326 # we have a scalar or len 0 ndarray 

327 # and its nan and we are changing some values 

328 if is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1): 

329 if isna(other): 

330 return changeit() 

331 

332 # we have an ndarray and the masking has nans in it 

333 else: 

334 

335 if isna(other).any(): 

336 return changeit() 

337 

338 try: 

339 np.place(result, mask, other) 

340 except TypeError: 

341 # e.g. int-dtype result and float-dtype other 

342 return changeit() 

343 

344 return result, False 

345 

346 

347def maybe_promote(dtype, fill_value=np.nan): 

348 """ 

349 Find the minimal dtype that can hold both the given dtype and fill_value. 

350 

351 Parameters 

352 ---------- 

353 dtype : np.dtype or ExtensionDtype 

354 fill_value : scalar, default np.nan 

355 

356 Returns 

357 ------- 

358 dtype 

359 Upcasted from dtype argument if necessary. 

360 fill_value 

361 Upcasted from fill_value argument if necessary. 

362 """ 

363 if not is_scalar(fill_value) and not is_object_dtype(dtype): 

364 # with object dtype there is nothing to promote, and the user can 

365 # pass pretty much any weird fill_value they like 

366 raise ValueError("fill_value must be a scalar") 

367 

368 # if we passed an array here, determine the fill value by dtype 

369 if isinstance(fill_value, np.ndarray): 

370 if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): 

371 fill_value = fill_value.dtype.type("NaT", "ns") 

372 else: 

373 

374 # we need to change to object type as our 

375 # fill_value is of object type 

376 if fill_value.dtype == np.object_: 

377 dtype = np.dtype(np.object_) 

378 fill_value = np.nan 

379 

380 if dtype == np.object_ or dtype.kind in ["U", "S"]: 

381 # We treat string-like dtypes as object, and _always_ fill 

382 # with np.nan 

383 fill_value = np.nan 

384 dtype = np.dtype(np.object_) 

385 

386 # returns tuple of (dtype, fill_value) 

387 if issubclass(dtype.type, np.datetime64): 

388 if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: 

389 # Trying to insert tzaware into tznaive, have to cast to object 

390 dtype = np.dtype(np.object_) 

391 elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)): 

392 dtype = np.dtype(np.object_) 

393 else: 

394 try: 

395 fill_value = tslibs.Timestamp(fill_value).to_datetime64() 

396 except (TypeError, ValueError): 

397 dtype = np.dtype(np.object_) 

398 elif issubclass(dtype.type, np.timedelta64): 

399 if ( 

400 is_integer(fill_value) 

401 or (is_float(fill_value) and not np.isnan(fill_value)) 

402 or isinstance(fill_value, str) 

403 ): 

404 # TODO: What about str that can be a timedelta? 

405 dtype = np.dtype(np.object_) 

406 else: 

407 try: 

408 fv = tslibs.Timedelta(fill_value) 

409 except ValueError: 

410 dtype = np.dtype(np.object_) 

411 else: 

412 if fv is NaT: 

413 # NaT has no `to_timedelta64` method 

414 fill_value = np.timedelta64("NaT", "ns") 

415 else: 

416 fill_value = fv.to_timedelta64() 

417 elif is_datetime64tz_dtype(dtype): 

418 if isna(fill_value): 

419 fill_value = NaT 

420 elif not isinstance(fill_value, datetime): 

421 dtype = np.dtype(np.object_) 

422 elif fill_value.tzinfo is None: 

423 dtype = np.dtype(np.object_) 

424 elif not tz_compare(fill_value.tzinfo, dtype.tz): 

425 # TODO: sure we want to cast here? 

426 dtype = np.dtype(np.object_) 

427 

428 elif is_extension_array_dtype(dtype) and isna(fill_value): 

429 fill_value = dtype.na_value 

430 

431 elif is_float(fill_value): 

432 if issubclass(dtype.type, np.bool_): 

433 dtype = np.dtype(np.object_) 

434 

435 elif issubclass(dtype.type, np.integer): 

436 dtype = np.dtype(np.float64) 

437 

438 elif dtype.kind == "f": 

439 mst = np.min_scalar_type(fill_value) 

440 if mst > dtype: 

441 # e.g. mst is np.float64 and dtype is np.float32 

442 dtype = mst 

443 

444 elif dtype.kind == "c": 

445 mst = np.min_scalar_type(fill_value) 

446 dtype = np.promote_types(dtype, mst) 

447 

448 elif is_bool(fill_value): 

449 if not issubclass(dtype.type, np.bool_): 

450 dtype = np.dtype(np.object_) 

451 

452 elif is_integer(fill_value): 

453 if issubclass(dtype.type, np.bool_): 

454 dtype = np.dtype(np.object_) 

455 

456 elif issubclass(dtype.type, np.integer): 

457 if not np.can_cast(fill_value, dtype): 

458 # upcast to prevent overflow 

459 mst = np.min_scalar_type(fill_value) 

460 dtype = np.promote_types(dtype, mst) 

461 if dtype.kind == "f": 

462 # Case where we disagree with numpy 

463 dtype = np.dtype(np.object_) 

464 

465 elif is_complex(fill_value): 

466 if issubclass(dtype.type, np.bool_): 

467 dtype = np.dtype(np.object_) 

468 

469 elif issubclass(dtype.type, (np.integer, np.floating)): 

470 mst = np.min_scalar_type(fill_value) 

471 dtype = np.promote_types(dtype, mst) 

472 

473 elif dtype.kind == "c": 

474 mst = np.min_scalar_type(fill_value) 

475 if mst > dtype: 

476 # e.g. mst is np.complex128 and dtype is np.complex64 

477 dtype = mst 

478 

479 elif fill_value is None: 

480 if is_float_dtype(dtype) or is_complex_dtype(dtype): 

481 fill_value = np.nan 

482 elif is_integer_dtype(dtype): 

483 dtype = np.float64 

484 fill_value = np.nan 

485 elif is_datetime_or_timedelta_dtype(dtype): 

486 fill_value = dtype.type("NaT", "ns") 

487 else: 

488 dtype = np.dtype(np.object_) 

489 fill_value = np.nan 

490 else: 

491 dtype = np.dtype(np.object_) 

492 

493 # in case we have a string that looked like a number 

494 if is_extension_array_dtype(dtype): 

495 pass 

496 elif issubclass(np.dtype(dtype).type, (bytes, str)): 

497 dtype = np.dtype(np.object_) 

498 

499 fill_value = _ensure_dtype_type(fill_value, dtype) 

500 return dtype, fill_value 

501 

502 

503def _ensure_dtype_type(value, dtype): 

504 """ 

505 Ensure that the given value is an instance of the given dtype. 

506 

507 e.g. if out dtype is np.complex64, we should have an instance of that 

508 as opposed to a python complex object. 

509 

510 Parameters 

511 ---------- 

512 value : object 

513 dtype : np.dtype or ExtensionDtype 

514 

515 Returns 

516 ------- 

517 object 

518 """ 

519 

520 # Start with exceptions in which we do _not_ cast to numpy types 

521 if is_extension_array_dtype(dtype): 

522 return value 

523 elif dtype == np.object_: 

524 return value 

525 elif isna(value): 

526 # e.g. keep np.nan rather than try to cast to np.float32(np.nan) 

527 return value 

528 

529 return dtype.type(value) 

530 

531 

532def infer_dtype_from(val, pandas_dtype: bool = False): 

533 """ 

534 Interpret the dtype from a scalar or array. 

535 

536 Parameters 

537 ---------- 

538 val : object 

539 pandas_dtype : bool, default False 

540 whether to infer dtype including pandas extension types. 

541 If False, scalar/array belongs to pandas extension types is inferred as 

542 object 

543 """ 

544 if is_scalar(val): 

545 return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) 

546 return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) 

547 

548 

549def infer_dtype_from_scalar(val, pandas_dtype: bool = False): 

550 """ 

551 Interpret the dtype from a scalar. 

552 

553 Parameters 

554 ---------- 

555 pandas_dtype : bool, default False 

556 whether to infer dtype including pandas extension types. 

557 If False, scalar belongs to pandas extension types is inferred as 

558 object 

559 """ 

560 

561 dtype = np.dtype(np.object_) 

562 

563 # a 1-element ndarray 

564 if isinstance(val, np.ndarray): 

565 msg = "invalid ndarray passed to infer_dtype_from_scalar" 

566 if val.ndim != 0: 

567 raise ValueError(msg) 

568 

569 dtype = val.dtype 

570 val = val.item() 

571 

572 elif isinstance(val, str): 

573 

574 # If we create an empty array using a string to infer 

575 # the dtype, NumPy will only allocate one character per entry 

576 # so this is kind of bad. Alternately we could use np.repeat 

577 # instead of np.empty (but then you still don't want things 

578 # coming out as np.str_! 

579 

580 dtype = np.dtype(np.object_) 

581 

582 elif isinstance(val, (np.datetime64, datetime)): 

583 val = tslibs.Timestamp(val) 

584 if val is tslibs.NaT or val.tz is None: 

585 dtype = np.dtype("M8[ns]") 

586 else: 

587 if pandas_dtype: 

588 dtype = DatetimeTZDtype(unit="ns", tz=val.tz) 

589 else: 

590 # return datetimetz as object 

591 return np.dtype(np.object_), val 

592 val = val.value 

593 

594 elif isinstance(val, (np.timedelta64, timedelta)): 

595 val = tslibs.Timedelta(val).value 

596 dtype = np.dtype("m8[ns]") 

597 

598 elif is_bool(val): 

599 dtype = np.dtype(np.bool_) 

600 

601 elif is_integer(val): 

602 if isinstance(val, np.integer): 

603 dtype = np.dtype(type(val)) 

604 else: 

605 dtype = np.dtype(np.int64) 

606 

607 elif is_float(val): 

608 if isinstance(val, np.floating): 

609 dtype = np.dtype(type(val)) 

610 else: 

611 dtype = np.dtype(np.float64) 

612 

613 elif is_complex(val): 

614 dtype = np.dtype(np.complex_) 

615 

616 elif pandas_dtype: 

617 if lib.is_period(val): 

618 dtype = PeriodDtype(freq=val.freq) 

619 val = val.ordinal 

620 elif lib.is_interval(val): 

621 subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] 

622 dtype = IntervalDtype(subtype=subtype) 

623 

624 return dtype, val 

625 

626 

627def infer_dtype_from_array(arr, pandas_dtype: bool = False): 

628 """ 

629 Infer the dtype from an array. 

630 

631 Parameters 

632 ---------- 

633 arr : array 

634 pandas_dtype : bool, default False 

635 whether to infer dtype including pandas extension types. 

636 If False, array belongs to pandas extension types 

637 is inferred as object 

638 

639 Returns 

640 ------- 

641 tuple (numpy-compat/pandas-compat dtype, array) 

642 

643 Notes 

644 ----- 

645 if pandas_dtype=False. these infer to numpy dtypes 

646 exactly with the exception that mixed / object dtypes 

647 are not coerced by stringifying or conversion 

648 

649 if pandas_dtype=True. datetime64tz-aware/categorical 

650 types will retain there character. 

651 

652 Examples 

653 -------- 

654 >>> np.asarray([1, '1']) 

655 array(['1', '1'], dtype='<U21') 

656 

657 >>> infer_dtype_from_array([1, '1']) 

658 (numpy.object_, [1, '1']) 

659 """ 

660 

661 if isinstance(arr, np.ndarray): 

662 return arr.dtype, arr 

663 

664 if not is_list_like(arr): 

665 arr = [arr] 

666 

667 if pandas_dtype and is_extension_array_dtype(arr): 

668 return arr.dtype, arr 

669 

670 elif isinstance(arr, ABCSeries): 

671 return arr.dtype, np.asarray(arr) 

672 

673 # don't force numpy coerce with nan's 

674 inferred = lib.infer_dtype(arr, skipna=False) 

675 if inferred in ["string", "bytes", "unicode", "mixed", "mixed-integer"]: 

676 return (np.object_, arr) 

677 

678 arr = np.asarray(arr) 

679 return arr.dtype, arr 

680 

681 

682def maybe_infer_dtype_type(element): 

683 """ 

684 Try to infer an object's dtype, for use in arithmetic ops. 

685 

686 Uses `element.dtype` if that's available. 

687 Objects implementing the iterator protocol are cast to a NumPy array, 

688 and from there the array's type is used. 

689 

690 Parameters 

691 ---------- 

692 element : object 

693 Possibly has a `.dtype` attribute, and possibly the iterator 

694 protocol. 

695 

696 Returns 

697 ------- 

698 tipo : type 

699 

700 Examples 

701 -------- 

702 >>> from collections import namedtuple 

703 >>> Foo = namedtuple("Foo", "dtype") 

704 >>> maybe_infer_dtype_type(Foo(np.dtype("i8"))) 

705 numpy.int64 

706 """ 

707 tipo = None 

708 if hasattr(element, "dtype"): 

709 tipo = element.dtype 

710 elif is_list_like(element): 

711 element = np.asarray(element) 

712 tipo = element.dtype 

713 return tipo 

714 

715 

716def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): 

717 """ 

718 Provide explicit type promotion and coercion. 

719 

720 Parameters 

721 ---------- 

722 values : ndarray or ExtensionArray 

723 The array that we want to maybe upcast. 

724 fill_value : what we want to fill with 

725 dtype : if None, then use the dtype of the values, else coerce to this type 

726 copy : bool, default True 

727 If True always make a copy even if no upcast is required. 

728 """ 

729 if not is_scalar(fill_value) and not is_object_dtype(values.dtype): 

730 # We allow arbitrary fill values for object dtype 

731 raise ValueError("fill_value must be a scalar") 

732 

733 if is_extension_array_dtype(values): 

734 if copy: 

735 values = values.copy() 

736 else: 

737 if dtype is None: 

738 dtype = values.dtype 

739 new_dtype, fill_value = maybe_promote(dtype, fill_value) 

740 if new_dtype != values.dtype: 

741 values = values.astype(new_dtype) 

742 elif copy: 

743 values = values.copy() 

744 

745 return values, fill_value 

746 

747 

748def invalidate_string_dtypes(dtype_set): 

749 """Change string like dtypes to object for 

750 ``DataFrame.select_dtypes()``. 

751 """ 

752 non_string_dtypes = dtype_set - {np.dtype("S").type, np.dtype("<U").type} 

753 if non_string_dtypes != dtype_set: 

754 raise TypeError("string dtypes are not allowed, use 'object' instead") 

755 

756 

757def coerce_indexer_dtype(indexer, categories): 

758 """ coerce the indexer input array to the smallest dtype possible """ 

759 length = len(categories) 

760 if length < _int8_max: 

761 return ensure_int8(indexer) 

762 elif length < _int16_max: 

763 return ensure_int16(indexer) 

764 elif length < _int32_max: 

765 return ensure_int32(indexer) 

766 return ensure_int64(indexer) 

767 

768 

769def coerce_to_dtypes(result, dtypes): 

770 """ 

771 given a dtypes and a result set, coerce the result elements to the 

772 dtypes 

773 """ 

774 if len(result) != len(dtypes): 

775 raise AssertionError("_coerce_to_dtypes requires equal len arrays") 

776 

777 def conv(r, dtype): 

778 if np.any(isna(r)): 

779 pass 

780 elif dtype == _NS_DTYPE: 

781 r = tslibs.Timestamp(r) 

782 elif dtype == _TD_DTYPE: 

783 r = tslibs.Timedelta(r) 

784 elif dtype == np.bool_: 

785 # messy. non 0/1 integers do not get converted. 

786 if is_integer(r) and r not in [0, 1]: 

787 return int(r) 

788 r = bool(r) 

789 elif dtype.kind == "f": 

790 r = float(r) 

791 elif dtype.kind == "i": 

792 r = int(r) 

793 

794 return r 

795 

796 return [conv(r, dtype) for r, dtype in zip(result, dtypes)] 

797 

798 

799def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): 

800 """ 

801 Cast the elements of an array to a given dtype a nan-safe manner. 

802 

803 Parameters 

804 ---------- 

805 arr : ndarray 

806 dtype : np.dtype 

807 copy : bool, default True 

808 If False, a view will be attempted but may fail, if 

809 e.g. the item sizes don't align. 

810 skipna: bool, default False 

811 Whether or not we should skip NaN when casting as a string-type. 

812 

813 Raises 

814 ------ 

815 ValueError 

816 The dtype was a datetime64/timedelta64 dtype, but it had no unit. 

817 """ 

818 

819 # dispatch on extension dtype if needed 

820 if is_extension_array_dtype(dtype): 

821 return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) 

822 

823 if not isinstance(dtype, np.dtype): 

824 dtype = pandas_dtype(dtype) 

825 

826 if issubclass(dtype.type, str): 

827 return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape) 

828 

829 elif is_datetime64_dtype(arr): 

830 if is_object_dtype(dtype): 

831 return tslib.ints_to_pydatetime(arr.view(np.int64)) 

832 elif dtype == np.int64: 

833 if isna(arr).any(): 

834 raise ValueError("Cannot convert NaT values to integer") 

835 return arr.view(dtype) 

836 

837 # allow frequency conversions 

838 if dtype.kind == "M": 

839 return arr.astype(dtype) 

840 

841 raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") 

842 

843 elif is_timedelta64_dtype(arr): 

844 if is_object_dtype(dtype): 

845 return tslibs.ints_to_pytimedelta(arr.view(np.int64)) 

846 elif dtype == np.int64: 

847 if isna(arr).any(): 

848 raise ValueError("Cannot convert NaT values to integer") 

849 return arr.view(dtype) 

850 

851 if dtype not in [_INT64_DTYPE, _TD_DTYPE]: 

852 

853 # allow frequency conversions 

854 # we return a float here! 

855 if dtype.kind == "m": 

856 mask = isna(arr) 

857 result = arr.astype(dtype).astype(np.float64) 

858 result[mask] = np.nan 

859 return result 

860 elif dtype == _TD_DTYPE: 

861 return arr.astype(_TD_DTYPE, copy=copy) 

862 

863 raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") 

864 

865 elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer): 

866 

867 if not np.isfinite(arr).all(): 

868 raise ValueError("Cannot convert non-finite values (NA or inf) to integer") 

869 

870 elif is_object_dtype(arr): 

871 

872 # work around NumPy brokenness, #1987 

873 if np.issubdtype(dtype.type, np.integer): 

874 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) 

875 

876 # if we have a datetime/timedelta array of objects 

877 # then coerce to a proper dtype and recall astype_nansafe 

878 

879 elif is_datetime64_dtype(dtype): 

880 from pandas import to_datetime 

881 

882 return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) 

883 elif is_timedelta64_dtype(dtype): 

884 from pandas import to_timedelta 

885 

886 return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) 

887 

888 if dtype.name in ("datetime64", "timedelta64"): 

889 msg = ( 

890 f"The '{dtype.name}' dtype has no unit. Please pass in " 

891 f"'{dtype.name}[ns]' instead." 

892 ) 

893 raise ValueError(msg) 

894 

895 if copy or is_object_dtype(arr) or is_object_dtype(dtype): 

896 # Explicit copy, or required since NumPy can't view from / to object. 

897 return arr.astype(dtype, copy=True) 

898 

899 return arr.view(dtype) 

900 

901 

902def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True): 

903 """ 

904 If we have an object dtype array, try to coerce dates and/or numbers. 

905 

906 Parameters 

907 ---------- 

908 values : ndarray 

909 convert_numeric : bool, default True 

910 

911 Returns 

912 ------- 

913 ndarray or DatetimeIndex 

914 """ 

915 validate_bool_kwarg(convert_numeric, "convert_numeric") 

916 

917 orig_values = values 

918 

919 # convert dates 

920 if is_object_dtype(values.dtype): 

921 values = lib.maybe_convert_objects(values, convert_datetime=True) 

922 

923 # convert timedeltas 

924 if is_object_dtype(values.dtype): 

925 values = lib.maybe_convert_objects(values, convert_timedelta=True) 

926 

927 # convert to numeric 

928 if is_object_dtype(values.dtype): 

929 if convert_numeric: 

930 try: 

931 new_values = lib.maybe_convert_numeric( 

932 values, set(), coerce_numeric=True 

933 ) 

934 except (ValueError, TypeError): 

935 pass 

936 else: 

937 # if we are all nans then leave me alone 

938 if not isna(new_values).all(): 

939 values = new_values 

940 

941 else: 

942 # soft-conversion 

943 values = lib.maybe_convert_objects(values) 

944 

945 if values is orig_values: 

946 values = values.copy() 

947 

948 return values 

949 

950 

951def soft_convert_objects( 

952 values: np.ndarray, 

953 datetime: bool = True, 

954 numeric: bool = True, 

955 timedelta: bool = True, 

956 coerce: bool = False, 

957 copy: bool = True, 

958): 

959 """ if we have an object dtype, try to coerce dates and/or numbers """ 

960 

961 validate_bool_kwarg(datetime, "datetime") 

962 validate_bool_kwarg(numeric, "numeric") 

963 validate_bool_kwarg(timedelta, "timedelta") 

964 validate_bool_kwarg(coerce, "coerce") 

965 validate_bool_kwarg(copy, "copy") 

966 

967 conversion_count = sum((datetime, numeric, timedelta)) 

968 if conversion_count == 0: 

969 raise ValueError("At least one of datetime, numeric or timedelta must be True.") 

970 elif conversion_count > 1 and coerce: 

971 raise ValueError( 

972 "Only one of 'datetime', 'numeric' or " 

973 "'timedelta' can be True when when coerce=True." 

974 ) 

975 

976 if not is_object_dtype(values.dtype): 

977 # If not object, do not attempt conversion 

978 values = values.copy() if copy else values 

979 return values 

980 

981 # If 1 flag is coerce, ensure 2 others are False 

982 if coerce: 

983 # Immediate return if coerce 

984 if datetime: 

985 from pandas import to_datetime 

986 

987 return to_datetime(values, errors="coerce").to_numpy() 

988 elif timedelta: 

989 from pandas import to_timedelta 

990 

991 return to_timedelta(values, errors="coerce").to_numpy() 

992 elif numeric: 

993 from pandas import to_numeric 

994 

995 return to_numeric(values, errors="coerce") 

996 

997 # Soft conversions 

998 if datetime: 

999 # GH 20380, when datetime is beyond year 2262, hence outside 

1000 # bound of nanosecond-resolution 64-bit integers. 

1001 try: 

1002 values = lib.maybe_convert_objects(values, convert_datetime=True) 

1003 except OutOfBoundsDatetime: 

1004 pass 

1005 

1006 if timedelta and is_object_dtype(values.dtype): 

1007 # Object check to ensure only run if previous did not convert 

1008 values = lib.maybe_convert_objects(values, convert_timedelta=True) 

1009 

1010 if numeric and is_object_dtype(values.dtype): 

1011 try: 

1012 converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) 

1013 except (ValueError, TypeError): 

1014 pass 

1015 else: 

1016 # If all NaNs, then do not-alter 

1017 values = converted if not isna(converted).all() else values 

1018 values = values.copy() if copy else values 

1019 

1020 return values 

1021 

1022 

1023def convert_dtypes( 

1024 input_array, 

1025 convert_string: bool = True, 

1026 convert_integer: bool = True, 

1027 convert_boolean: bool = True, 

1028) -> Dtype: 

1029 """ 

1030 Convert objects to best possible type, and optionally, 

1031 to types supporting ``pd.NA``. 

1032 

1033 Parameters 

1034 ---------- 

1035 input_array : ExtensionArray or PandasArray 

1036 convert_string : bool, default True 

1037 Whether object dtypes should be converted to ``StringDtype()``. 

1038 convert_integer : bool, default True 

1039 Whether, if possible, conversion can be done to integer extension types. 

1040 convert_boolean : bool, defaults True 

1041 Whether object dtypes should be converted to ``BooleanDtypes()``. 

1042 

1043 Returns 

1044 ------- 

1045 dtype 

1046 new dtype 

1047 """ 

1048 is_extension = is_extension_array_dtype(input_array.dtype) 

1049 if (convert_string or convert_integer or convert_boolean) and not is_extension: 

1050 try: 

1051 inferred_dtype = lib.infer_dtype(input_array) 

1052 except ValueError: 

1053 # Required to catch due to Period. Can remove once GH 23553 is fixed 

1054 inferred_dtype = input_array.dtype 

1055 

1056 if not convert_string and is_string_dtype(inferred_dtype): 

1057 inferred_dtype = input_array.dtype 

1058 

1059 if convert_integer: 

1060 target_int_dtype = "Int64" 

1061 

1062 if is_integer_dtype(input_array.dtype): 

1063 from pandas.core.arrays.integer import _dtypes 

1064 

1065 inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) 

1066 if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( 

1067 input_array.dtype 

1068 ): 

1069 inferred_dtype = target_int_dtype 

1070 

1071 else: 

1072 if is_integer_dtype(inferred_dtype): 

1073 inferred_dtype = input_array.dtype 

1074 

1075 if convert_boolean: 

1076 if is_bool_dtype(input_array.dtype): 

1077 inferred_dtype = "boolean" 

1078 else: 

1079 if isinstance(inferred_dtype, str) and inferred_dtype == "boolean": 

1080 inferred_dtype = input_array.dtype 

1081 

1082 else: 

1083 inferred_dtype = input_array.dtype 

1084 

1085 return inferred_dtype 

1086 

1087 

1088def maybe_castable(arr) -> bool: 

1089 # return False to force a non-fastpath 

1090 

1091 # check datetime64[ns]/timedelta64[ns] are valid 

1092 # otherwise try to coerce 

1093 kind = arr.dtype.kind 

1094 if kind == "M": 

1095 return is_datetime64_ns_dtype(arr.dtype) 

1096 elif kind == "m": 

1097 return is_timedelta64_ns_dtype(arr.dtype) 

1098 

1099 return arr.dtype.name not in _POSSIBLY_CAST_DTYPES 

1100 

1101 

1102def maybe_infer_to_datetimelike(value, convert_dates: bool = False): 

1103 """ 

1104 we might have a array (or single object) that is datetime like, 

1105 and no dtype is passed don't change the value unless we find a 

1106 datetime/timedelta set 

1107 

1108 this is pretty strict in that a datetime/timedelta is REQUIRED 

1109 in addition to possible nulls/string likes 

1110 

1111 Parameters 

1112 ---------- 

1113 value : np.array / Series / Index / list-like 

1114 convert_dates : bool, default False 

1115 if True try really hard to convert dates (such as datetime.date), other 

1116 leave inferred dtype 'date' alone 

1117 

1118 """ 

1119 

1120 # TODO: why not timedelta? 

1121 if isinstance( 

1122 value, (ABCDatetimeIndex, ABCPeriodIndex, ABCDatetimeArray, ABCPeriodArray) 

1123 ): 

1124 return value 

1125 elif isinstance(value, ABCSeries): 

1126 if isinstance(value._values, ABCDatetimeIndex): 

1127 return value._values 

1128 

1129 v = value 

1130 

1131 if not is_list_like(v): 

1132 v = [v] 

1133 v = np.array(v, copy=False) 

1134 

1135 # we only care about object dtypes 

1136 if not is_object_dtype(v): 

1137 return value 

1138 

1139 shape = v.shape 

1140 if not v.ndim == 1: 

1141 v = v.ravel() 

1142 

1143 if not len(v): 

1144 return value 

1145 

1146 def try_datetime(v): 

1147 # safe coerce to datetime64 

1148 try: 

1149 # GH19671 

1150 v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0] 

1151 except ValueError: 

1152 

1153 # we might have a sequence of the same-datetimes with tz's 

1154 # if so coerce to a DatetimeIndex; if they are not the same, 

1155 # then these stay as object dtype, xref GH19671 

1156 from pandas._libs.tslibs import conversion 

1157 from pandas import DatetimeIndex 

1158 

1159 try: 

1160 

1161 values, tz = conversion.datetime_to_datetime64(v) 

1162 return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) 

1163 except (ValueError, TypeError): 

1164 pass 

1165 

1166 except Exception: 

1167 pass 

1168 

1169 return v.reshape(shape) 

1170 

1171 def try_timedelta(v): 

1172 # safe coerce to timedelta64 

1173 

1174 # will try first with a string & object conversion 

1175 from pandas import to_timedelta 

1176 

1177 try: 

1178 return to_timedelta(v)._ndarray_values.reshape(shape) 

1179 except ValueError: 

1180 return v.reshape(shape) 

1181 

1182 inferred_type = lib.infer_datetimelike_array(ensure_object(v)) 

1183 

1184 if inferred_type == "date" and convert_dates: 

1185 value = try_datetime(v) 

1186 elif inferred_type == "datetime": 

1187 value = try_datetime(v) 

1188 elif inferred_type == "timedelta": 

1189 value = try_timedelta(v) 

1190 elif inferred_type == "nat": 

1191 

1192 # if all NaT, return as datetime 

1193 if isna(v).all(): 

1194 value = try_datetime(v) 

1195 else: 

1196 

1197 # We have at least a NaT and a string 

1198 # try timedelta first to avoid spurious datetime conversions 

1199 # e.g. '00:00:01' is a timedelta but technically is also a datetime 

1200 value = try_timedelta(v) 

1201 if lib.infer_dtype(value, skipna=False) in ["mixed"]: 

1202 # cannot skip missing values, as NaT implies that the string 

1203 # is actually a datetime 

1204 value = try_datetime(v) 

1205 

1206 return value 

1207 

1208 

1209def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): 

1210 """ try to cast the array/value to a datetimelike dtype, converting float 

1211 nan to iNaT 

1212 """ 

1213 from pandas.core.tools.timedeltas import to_timedelta 

1214 from pandas.core.tools.datetimes import to_datetime 

1215 

1216 if dtype is not None: 

1217 if isinstance(dtype, str): 

1218 dtype = np.dtype(dtype) 

1219 

1220 is_datetime64 = is_datetime64_dtype(dtype) 

1221 is_datetime64tz = is_datetime64tz_dtype(dtype) 

1222 is_timedelta64 = is_timedelta64_dtype(dtype) 

1223 

1224 if is_datetime64 or is_datetime64tz or is_timedelta64: 

1225 

1226 # Force the dtype if needed. 

1227 msg = ( 

1228 f"The '{dtype.name}' dtype has no unit. " 

1229 f"Please pass in '{dtype.name}[ns]' instead." 

1230 ) 

1231 

1232 if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): 

1233 

1234 # pandas supports dtype whose granularity is less than [ns] 

1235 # e.g., [ps], [fs], [as] 

1236 if dtype <= np.dtype("M8[ns]"): 

1237 if dtype.name == "datetime64": 

1238 raise ValueError(msg) 

1239 dtype = _NS_DTYPE 

1240 else: 

1241 raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") 

1242 elif is_datetime64tz: 

1243 

1244 # our NaT doesn't support tz's 

1245 # this will coerce to DatetimeIndex with 

1246 # a matching dtype below 

1247 if is_scalar(value) and isna(value): 

1248 value = [value] 

1249 

1250 elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): 

1251 

1252 # pandas supports dtype whose granularity is less than [ns] 

1253 # e.g., [ps], [fs], [as] 

1254 if dtype <= np.dtype("m8[ns]"): 

1255 if dtype.name == "timedelta64": 

1256 raise ValueError(msg) 

1257 dtype = _TD_DTYPE 

1258 else: 

1259 raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") 

1260 

1261 if is_scalar(value): 

1262 if value == iNaT or isna(value): 

1263 value = iNaT 

1264 else: 

1265 value = np.array(value, copy=False) 

1266 

1267 # have a scalar array-like (e.g. NaT) 

1268 if value.ndim == 0: 

1269 value = iNaT 

1270 

1271 # we have an array of datetime or timedeltas & nulls 

1272 elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype): 

1273 try: 

1274 if is_datetime64: 

1275 value = to_datetime(value, errors=errors) 

1276 # GH 25843: Remove tz information since the dtype 

1277 # didn't specify one 

1278 if value.tz is not None: 

1279 value = value.tz_localize(None) 

1280 value = value._values 

1281 elif is_datetime64tz: 

1282 # The string check can be removed once issue #13712 

1283 # is solved. String data that is passed with a 

1284 # datetime64tz is assumed to be naive which should 

1285 # be localized to the timezone. 

1286 is_dt_string = is_string_dtype(value) 

1287 value = to_datetime(value, errors=errors).array 

1288 if is_dt_string: 

1289 # Strings here are naive, so directly localize 

1290 value = value.tz_localize(dtype.tz) 

1291 else: 

1292 # Numeric values are UTC at this point, 

1293 # so localize and convert 

1294 value = value.tz_localize("UTC").tz_convert(dtype.tz) 

1295 elif is_timedelta64: 

1296 value = to_timedelta(value, errors=errors)._values 

1297 except OutOfBoundsDatetime: 

1298 raise 

1299 except (AttributeError, ValueError, TypeError): 

1300 pass 

1301 

1302 # coerce datetimelike to object 

1303 elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): 

1304 if is_object_dtype(dtype): 

1305 if value.dtype != _NS_DTYPE: 

1306 value = value.astype(_NS_DTYPE) 

1307 ints = np.asarray(value).view("i8") 

1308 return tslib.ints_to_pydatetime(ints) 

1309 

1310 # we have a non-castable dtype that was passed 

1311 raise TypeError(f"Cannot cast datetime64 to {dtype}") 

1312 

1313 else: 

1314 

1315 is_array = isinstance(value, np.ndarray) 

1316 

1317 # catch a datetime/timedelta that is not of ns variety 

1318 # and no coercion specified 

1319 if is_array and value.dtype.kind in ["M", "m"]: 

1320 dtype = value.dtype 

1321 

1322 if dtype.kind == "M" and dtype != _NS_DTYPE: 

1323 value = tslibs.conversion.ensure_datetime64ns(value) 

1324 

1325 elif dtype.kind == "m" and dtype != _TD_DTYPE: 

1326 value = to_timedelta(value) 

1327 

1328 # only do this if we have an array and the dtype of the array is not 

1329 # setup already we are not an integer/object, so don't bother with this 

1330 # conversion 

1331 elif not ( 

1332 is_array 

1333 and not ( 

1334 issubclass(value.dtype.type, np.integer) or value.dtype == np.object_ 

1335 ) 

1336 ): 

1337 value = maybe_infer_to_datetimelike(value) 

1338 

1339 return value 

1340 

1341 

1342def find_common_type(types): 

1343 """ 

1344 Find a common data type among the given dtypes. 

1345 

1346 Parameters 

1347 ---------- 

1348 types : list of dtypes 

1349 

1350 Returns 

1351 ------- 

1352 pandas extension or numpy dtype 

1353 

1354 See Also 

1355 -------- 

1356 numpy.find_common_type 

1357 

1358 """ 

1359 

1360 if len(types) == 0: 

1361 raise ValueError("no types given") 

1362 

1363 first = types[0] 

1364 

1365 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) 

1366 # => object 

1367 if all(is_dtype_equal(first, t) for t in types[1:]): 

1368 return first 

1369 

1370 if any(isinstance(t, ExtensionDtype) for t in types): 

1371 return np.object 

1372 

1373 # take lowest unit 

1374 if all(is_datetime64_dtype(t) for t in types): 

1375 return np.dtype("datetime64[ns]") 

1376 if all(is_timedelta64_dtype(t) for t in types): 

1377 return np.dtype("timedelta64[ns]") 

1378 

1379 # don't mix bool / int or float or complex 

1380 # this is different from numpy, which casts bool with float/int as int 

1381 has_bools = any(is_bool_dtype(t) for t in types) 

1382 if has_bools: 

1383 for t in types: 

1384 if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): 

1385 return np.object 

1386 

1387 return np.find_common_type(types, []) 

1388 

1389 

1390def cast_scalar_to_array(shape, value, dtype=None): 

1391 """ 

1392 Create np.ndarray of specified shape and dtype, filled with values. 

1393 

1394 Parameters 

1395 ---------- 

1396 shape : tuple 

1397 value : scalar value 

1398 dtype : np.dtype, optional 

1399 dtype to coerce 

1400 

1401 Returns 

1402 ------- 

1403 ndarray of shape, filled with value, of specified / inferred dtype 

1404 

1405 """ 

1406 

1407 if dtype is None: 

1408 dtype, fill_value = infer_dtype_from_scalar(value) 

1409 else: 

1410 fill_value = value 

1411 

1412 values = np.empty(shape, dtype=dtype) 

1413 values.fill(fill_value) 

1414 

1415 return values 

1416 

1417 

1418def construct_1d_arraylike_from_scalar(value, length: int, dtype): 

1419 """ 

1420 create a np.ndarray / pandas type of specified shape and dtype 

1421 filled with values 

1422 

1423 Parameters 

1424 ---------- 

1425 value : scalar value 

1426 length : int 

1427 dtype : pandas_dtype / np.dtype 

1428 

1429 Returns 

1430 ------- 

1431 np.ndarray / pandas type of length, filled with value 

1432 

1433 """ 

1434 if is_extension_array_dtype(dtype): 

1435 cls = dtype.construct_array_type() 

1436 subarr = cls._from_sequence([value] * length, dtype=dtype) 

1437 

1438 else: 

1439 if not isinstance(dtype, (np.dtype, type(np.dtype))): 

1440 dtype = dtype.dtype 

1441 

1442 if length and is_integer_dtype(dtype) and isna(value): 

1443 # coerce if we have nan for an integer dtype 

1444 dtype = np.dtype("float64") 

1445 elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): 

1446 # we need to coerce to object dtype to avoid 

1447 # to allow numpy to take our string as a scalar value 

1448 dtype = object 

1449 if not isna(value): 

1450 value = ensure_str(value) 

1451 

1452 subarr = np.empty(length, dtype=dtype) 

1453 subarr.fill(value) 

1454 

1455 return subarr 

1456 

1457 

1458def construct_1d_object_array_from_listlike(values): 

1459 """ 

1460 Transform any list-like object in a 1-dimensional numpy array of object 

1461 dtype. 

1462 

1463 Parameters 

1464 ---------- 

1465 values : any iterable which has a len() 

1466 

1467 Raises 

1468 ------ 

1469 TypeError 

1470 * If `values` does not have a len() 

1471 

1472 Returns 

1473 ------- 

1474 1-dimensional numpy array of dtype object 

1475 """ 

1476 # numpy will try to interpret nested lists as further dimensions, hence 

1477 # making a 1D array that contains list-likes is a bit tricky: 

1478 result = np.empty(len(values), dtype="object") 

1479 result[:] = values 

1480 return result 

1481 

1482 

1483def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False): 

1484 """ 

1485 Construct a new ndarray, coercing `values` to `dtype`, preserving NA. 

1486 

1487 Parameters 

1488 ---------- 

1489 values : Sequence 

1490 dtype : numpy.dtype, optional 

1491 copy : bool, default False 

1492 Note that copies may still be made with ``copy=False`` if casting 

1493 is required. 

1494 

1495 Returns 

1496 ------- 

1497 arr : ndarray[dtype] 

1498 

1499 Examples 

1500 -------- 

1501 >>> np.array([1.0, 2.0, None], dtype='str') 

1502 array(['1.0', '2.0', 'None'], dtype='<U4') 

1503 

1504 >>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str')) 

1505 array(['1.0', '2.0', None], dtype=object) 

1506 """ 

1507 subarr = np.array(values, dtype=dtype, copy=copy) 

1508 

1509 if dtype is not None and dtype.kind in ("U", "S"): 

1510 # GH-21083 

1511 # We can't just return np.array(subarr, dtype='str') since 

1512 # NumPy will convert the non-string objects into strings 

1513 # Including NA values. Se we have to go 

1514 # string -> object -> update NA, which requires an 

1515 # additional pass over the data. 

1516 na_values = isna(values) 

1517 subarr2 = subarr.astype(object) 

1518 subarr2[na_values] = np.asarray(values, dtype=object)[na_values] 

1519 subarr = subarr2 

1520 

1521 return subarr 

1522 

1523 

1524def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): 

1525 """ 

1526 Takes any dtype and returns the casted version, raising for when data is 

1527 incompatible with integer/unsigned integer dtypes. 

1528 

1529 .. versionadded:: 0.24.0 

1530 

1531 Parameters 

1532 ---------- 

1533 arr : array-like 

1534 The array to cast. 

1535 dtype : str, np.dtype 

1536 The integer dtype to cast the array to. 

1537 copy: bool, default False 

1538 Whether to make a copy of the array before returning. 

1539 

1540 Returns 

1541 ------- 

1542 int_arr : ndarray 

1543 An array of integer or unsigned integer dtype 

1544 

1545 Raises 

1546 ------ 

1547 OverflowError : the dtype is incompatible with the data 

1548 ValueError : loss of precision has occurred during casting 

1549 

1550 Examples 

1551 -------- 

1552 If you try to coerce negative values to unsigned integers, it raises: 

1553 

1554 >>> Series([-1], dtype="uint64") 

1555 Traceback (most recent call last): 

1556 ... 

1557 OverflowError: Trying to coerce negative values to unsigned integers 

1558 

1559 Also, if you try to coerce float values to integers, it raises: 

1560 

1561 >>> Series([1, 2, 3.5], dtype="int64") 

1562 Traceback (most recent call last): 

1563 ... 

1564 ValueError: Trying to coerce float values to integers 

1565 """ 

1566 

1567 try: 

1568 if not hasattr(arr, "astype"): 

1569 casted = np.array(arr, dtype=dtype, copy=copy) 

1570 else: 

1571 casted = arr.astype(dtype, copy=copy) 

1572 except OverflowError: 

1573 raise OverflowError( 

1574 "The elements provided in the data cannot all be " 

1575 f"casted to the dtype {dtype}" 

1576 ) 

1577 

1578 if np.array_equal(arr, casted): 

1579 return casted 

1580 

1581 # We do this casting to allow for proper 

1582 # data and dtype checking. 

1583 # 

1584 # We didn't do this earlier because NumPy 

1585 # doesn't handle `uint64` correctly. 

1586 arr = np.asarray(arr) 

1587 

1588 if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): 

1589 raise OverflowError("Trying to coerce negative values to unsigned integers") 

1590 

1591 if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): 

1592 raise ValueError("Trying to coerce float values to integers")