Coverage for src/configuraptor/core.py: 100%

243 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2026-02-11 11:46 +0100

1""" 

2Contains most of the loading logic. 

3""" 

4 

5import dataclasses as dc 

6import io 

7import os 

8import typing 

9import warnings 

10from pathlib import Path 

11from typing import Any, Type 

12 

13import requests 

14from dotenv import dotenv_values as _dotenv_values 

15from dotenv import find_dotenv 

16 

17from . import loaders 

18from .abs import DEFAULT_ENV_SETTING, AnyType, C, T, T_data, Type_C, UseEnvSetting 

19from .alias import Alias, has_alias 

20from .binary_config import BinaryConfig 

21from .errors import ( 

22 ConfigErrorCouldNotConvert, 

23 ConfigErrorInvalidType, 

24 ConfigErrorMissingKey, 

25 FailedToLoad, 

26) 

27from .helpers import ( 

28 all_annotations, 

29 camel_to_snake, 

30 check_type, 

31 dataclass_field, 

32 expand_env_vars_into_toml_values, 

33 find_pyproject_toml, 

34 is_custom_class, 

35 is_optional, 

36 is_parameterized, 

37 is_union, 

38) 

39from .postpone import Postponed 

40from .type_converters import CONVERTERS 

41 

42 

43def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]: 

44 """ 

45 If a key contains a dot, traverse the raw dict until the right key was found. 

46 

47 Example: 

48 key = some.nested.key 

49 raw = {"some": {"nested": {"key": {"with": "data"}}}} 

50 -> {"with": "data"} 

51 """ 

52 parts = key.split(".") 

53 while parts: 

54 key = parts.pop(0) 

55 if key not in raw: 

56 return {} 

57 

58 raw = raw[key] 

59 

60 return raw 

61 

62 

63def _guess_key(clsname: str) -> str: 

64 """ 

65 If no key is manually defined for `load_into`, \ 

66 the class' name is converted to snake_case to use as the default key. 

67 """ 

68 return camel_to_snake(clsname) 

69 

70 

71def _from_mock_url(url: str) -> str: 

72 """ 

73 Pytest only: when starting a url with mock:// it is expected to just be json afterwards. 

74 """ 

75 return url.removeprefix("mock://") 

76 

77 

78def guess_filetype_for_url(url: str, response: requests.Response = None) -> str: 

79 """ 

80 Based on the url (which may have an extension) and the requests response \ 

81 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml). 

82 

83 Falls back to JSON if none can be found. 

84 """ 

85 url = url.split("?")[0] 

86 if url_extension := os.path.splitext(url)[1].lower(): 

87 return url_extension.strip(".") 

88 

89 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()): 

90 content_type = content_type_header.split("/")[-1] 

91 if content_type != "plain": 

92 return content_type 

93 

94 # If both methods fail, default to JSON 

95 return "json" 

96 

97 

98def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]: 

99 """ 

100 Load data as bytes into a file-like object and return the file type. 

101 

102 This can be used by __load_data: 

103 > loader = loaders.get(filetype) 

104 > # dev/null exists but always returns b'' 

105 > data = loader(contents, Path("/dev/null")) 

106 """ 

107 if url.startswith("mock://"): 

108 data = _from_mock_url(url) 

109 resp = None 

110 elif _dummy: 

111 resp = None 

112 data = "{}" 

113 else: 

114 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1" 

115 

116 resp = requests.get(url, timeout=10, verify=ssl_verify) 

117 data = resp.text 

118 

119 filetype = guess_filetype_for_url(url, resp) 

120 return io.BytesIO(data.encode()), filetype 

121 

122 

123def dotenv_values() -> dict[str, str | None]: 

124 """Wrapper around dotenv.dotenv_values that uses .env in cwd.""" 

125 return _dotenv_values(dotenv_path=find_dotenv(usecwd=True)) 

126 

127 

128def apply_env(data: dict[str, typing.Any], use_env: UseEnvSetting) -> None: 

129 """ 

130 Apply the desired env-setting logic on data. 

131 """ 

132 match use_env: 

133 case "yes": 

134 env = dotenv_values() | os.environ 

135 case "inverse": 

136 env = os.environ | dotenv_values() 

137 case "dotenv": 

138 env = dotenv_values() 

139 case "environ": 

140 env = {**os.environ} 

141 case _: # pragma: no cover 

142 return 

143 

144 expand_env_vars_into_toml_values(data, env) 

145 

146 

147def _load_data( 

148 data: T_data, 

149 key: str = None, 

150 classname: str = None, 

151 lower_keys: bool = False, 

152 allow_types: tuple[type, ...] = (dict,), 

153 strict: bool = False, 

154 use_env: UseEnvSetting = DEFAULT_ENV_SETTING, 

155) -> dict[str, typing.Any]: 

156 """ 

157 Tries to load the right data from a filename/path or dict, based on a manual key or a classname. 

158 

159 E.g. class Tool will be mapped to key tool. 

160 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}} 

161 """ 

162 if isinstance(data, bytes): 

163 # instantly return, don't modify 

164 # bytes as inputs -> bytes as output 

165 # but since `T_data` is re-used, that's kind of hard to type for mypy. 

166 return data # type: ignore 

167 

168 if isinstance(data, list): 

169 if not data: 

170 raise ValueError("Empty list passed!") 

171 

172 final_data: dict[str, typing.Any] = {} 

173 for source in data: 

174 final_data |= load_data( 

175 source, 

176 key=key, 

177 classname=classname, 

178 lower_keys=True, 

179 allow_types=allow_types, 

180 strict=strict, 

181 use_env=use_env, 

182 ) 

183 

184 return final_data 

185 

186 if isinstance(data, str): 

187 if data.startswith(("http://", "https://", "mock://")): 

188 contents, filetype = from_url(data) 

189 

190 loader = loaders.get(filetype) 

191 # dev/null exists but always returns b'' 

192 data = loader(contents, Path("/dev/null")) 

193 else: 

194 data = Path(data) 

195 

196 if isinstance(data, Path): 

197 with data.open("rb") as f: 

198 loader = loaders.get(data.suffix or data.name) 

199 data = loader(f, data.resolve()) 

200 

201 if not data: 

202 return {} 

203 

204 if key is None: 

205 # try to guess key by grabbing the first one or using the class name 

206 if len(data) == 1: 

207 key = next(iter(data.keys())) 

208 elif classname is not None: 

209 key = _guess_key(classname) 

210 

211 if key: 

212 data = _data_for_nested_key(key, data) 

213 

214 if not data: 

215 raise ValueError("No data found!") 

216 

217 if not isinstance(data, allow_types): 

218 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!") 

219 

220 if lower_keys and isinstance(data, dict): 

221 data = {k.lower(): v for k, v in data.items()} 

222 

223 if use_env != "no" and isinstance(data, dict): 

224 apply_env(data, use_env) 

225 

226 return typing.cast(dict[str, typing.Any], data) 

227 

228 

229def load_data( 

230 data: T_data, 

231 key: str = None, 

232 classname: str = None, 

233 lower_keys: bool = False, 

234 allow_types: tuple[type, ...] = (dict,), 

235 strict: bool = False, 

236 use_env: UseEnvSetting = DEFAULT_ENV_SETTING, 

237) -> dict[str, typing.Any]: 

238 """ 

239 Wrapper around __load_data that retries with key="" if anything goes wrong. 

240 """ 

241 if data is None: 

242 # try to load pyproject.toml 

243 data = find_pyproject_toml() 

244 

245 try: 

246 return _load_data( 

247 data, 

248 key, 

249 classname, 

250 lower_keys=lower_keys, 

251 allow_types=allow_types, 

252 strict=strict, 

253 use_env=use_env, 

254 ) 

255 except Exception as e: 

256 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches 

257 # @sourcery: `key != ""` is NOT the same as `not key` 

258 if key != "": 

259 # try again with key "" 

260 return load_data( 

261 data, 

262 "", 

263 classname, 

264 lower_keys=lower_keys, 

265 allow_types=allow_types, 

266 strict=strict, 

267 use_env=use_env, 

268 ) 

269 elif strict: 

270 raise FailedToLoad(data) from e 

271 else: 

272 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json 

273 warnings.warn(f"Data ('{data!r}') could not be loaded", source=e, category=UserWarning) 

274 return {} 

275 

276 

277F = typing.TypeVar("F") 

278 

279 

280def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T: 

281 """ 

282 Convert a value between types. 

283 """ 

284 if converter := CONVERTERS.get((from_type, to_type)): 

285 return typing.cast(T, converter(from_value)) 

286 

287 # default: just convert type: 

288 return to_type(from_value) # type: ignore 

289 

290 

291def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T: 

292 """ 

293 Checks if the given value matches the specified type. If it does, the value is returned as is. 

294 

295 Args: 

296 value (Any): The value to be checked and potentially converted. 

297 _type (Type[T]): The expected type for the value. 

298 convert_types (bool): If True, allows type conversion if the types do not match. 

299 key (str, optional): The name or key associated with the variable (used in error messages). 

300 Defaults to "variable". 

301 

302 Returns: 

303 T: The value, potentially converted to the expected type. 

304 

305 Raises: 

306 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed. 

307 ConfigErrorCouldNotConvert: If type conversion fails. 

308 """ 

309 if check_type(value, _type): 

310 # type matches 

311 return value 

312 

313 if isinstance(value, Alias): 

314 if is_optional(_type): 

315 return typing.cast(T, None) 

316 else: 

317 # unresolved alias, error should've already been thrown for parent but lets do it again: 

318 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type) 

319 

320 if not convert_types: 

321 # type does not match and should not be converted 

322 raise ConfigErrorInvalidType(key, value=value, expected_type=_type) 

323 

324 # else: type does not match, try to convert it 

325 try: 

326 return convert_between(value, type(value), _type) 

327 except (TypeError, ValueError) as e: 

328 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e 

329 

330 

331def ensure_types( 

332 data: dict[str, T], 

333 annotations: dict[str, type[T]], 

334 convert_types: bool = False, 

335) -> dict[str, T | None]: 

336 """ 

337 Make sure all values in 'data' are in line with the ones stored in 'annotations'. 

338 

339 If an annotated key in missing from data, it will be filled with None for convenience. 

340 

341 TODO: python 3.11 exception groups to throw multiple errors at once! 

342 """ 

343 # custom object to use instead of None, since typing.Optional can be None! 

344 # cast to T to make mypy happy 

345 notfound = typing.cast(T, object()) 

346 

347 final: dict[str, T | None] = {} 

348 for key, _type in annotations.items(): 

349 compare = data.get(key, notfound) 

350 if compare is notfound: # pragma: nocover 

351 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`") 

352 # skip! 

353 continue 

354 

355 if isinstance(compare, Postponed): 

356 # don't do anything with this item! 

357 continue 

358 

359 if isinstance(compare, Alias): 

360 related_data = data.get(compare.to, notfound) 

361 if related_data is not notfound: 

362 if isinstance(related_data, Postponed): 

363 # also continue alias for postponed items 

364 continue 

365 

366 # original key set, update alias 

367 compare = related_data 

368 

369 compare = check_and_convert_type(compare, _type, convert_types, key) 

370 

371 final[key] = compare 

372 

373 return final 

374 

375 

376def convert_key(key: str) -> str: 

377 """ 

378 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties. 

379 """ 

380 return key.replace("-", "_").replace(".", "_") 

381 

382 

383def convert_config(items: dict[str, T]) -> dict[str, T]: 

384 """ 

385 Converts the config dict (from toml) or 'overwrites' dict in two ways. 

386 

387 1. removes any items where the value is None, since in that case the default should be used; 

388 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties. 

389 """ 

390 return {convert_key(k): v for k, v in items.items() if v is not None} 

391 

392 

393def load_recursive( 

394 cls: AnyType, 

395 data: dict[str, T], 

396 annotations: dict[str, AnyType], 

397 convert_types: bool = False, 

398) -> dict[str, T]: 

399 """ 

400 For all annotations (recursively gathered from parents with `all_annotations`), \ 

401 try to resolve the tree of annotations. 

402 

403 Uses `load_into_recurse`, not itself directly. 

404 

405 Example: 

406 class First: 

407 key: str 

408 

409 class Second: 

410 other: First 

411 

412 # step 1 

413 cls = Second 

414 data = {"second": {"other": {"key": "anything"}}} 

415 annotations: {"other": First} 

416 

417 # step 1.5 

418 data = {"other": {"key": "anything"} 

419 annotations: {"other": First} 

420 

421 # step 2 

422 cls = First 

423 data = {"key": "anything"} 

424 annotations: {"key": str} 

425 

426 

427 TODO: python 3.11 exception groups to throw multiple errors at once! 

428 """ 

429 updated = {} 

430 

431 for _key, _type in annotations.items(): 

432 if _key in data: 

433 value: typing.Any = data[_key] # value can change so define it as any instead of T 

434 if is_parameterized(_type): 

435 origin = typing.get_origin(_type) 

436 arguments = typing.get_args(_type) 

437 if origin is list and arguments and is_custom_class(arguments[0]): 

438 subtype = arguments[0] 

439 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value] 

440 

441 elif origin is dict and arguments and is_custom_class(arguments[1]): 

442 # e.g. dict[str, Point] 

443 subkeytype, subvaluetype = arguments 

444 # subkey(type) is not a custom class, so don't try to convert it: 

445 value = { 

446 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types) 

447 for subkey, subvalue in value.items() 

448 } 

449 # elif origin is dict: 

450 # keep data the same 

451 elif is_union(_type) and arguments: 

452 for arg in arguments: 

453 if is_custom_class(arg): 

454 value = _load_into_recurse(arg, value, convert_types=convert_types) 

455 

456 elif is_custom_class(_type): 

457 # type must be C (custom class) at this point; includes dataclass but not optional[cls] 

458 value = _load_into_recurse( 

459 # make mypy and pycharm happy by telling it _type is of type C... 

460 # actually just passing _type as first arg! 

461 typing.cast(Type_C[typing.Any], _type), 

462 value, 

463 convert_types=convert_types, 

464 ) 

465 

466 # else: normal value, don't change 

467 

468 elif value := has_alias(cls, _key, data): 

469 # value updated by alias 

470 ... 

471 elif _key in cls.__dict__: 

472 # property has default, use that instead. 

473 value = cls.__dict__[_key] 

474 elif is_optional(_type): 

475 # type is optional and not found in __dict__ -> default is None 

476 value = None 

477 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING: 

478 # could have a default factory 

479 # todo: do something with field.default? 

480 value = field.default_factory() 

481 else: 

482 raise ConfigErrorMissingKey(_key, cls, _type) 

483 

484 updated[_key] = value 

485 

486 return updated 

487 

488 

489def check_and_convert_data( 

490 cls: typing.Type[C], 

491 data: dict[str, typing.Any], 

492 _except: typing.Iterable[str], 

493 strict: bool = True, 

494 convert_types: bool = False, 

495) -> dict[str, typing.Any]: 

496 """ 

497 Based on class annotations, this prepares the data for `load_into_recurse`. 

498 

499 1. convert config-keys to python compatible config_keys 

500 2. loads custom class type annotations with the same logic (see also `load_recursive`) 

501 3. ensures the annotated types match the actual types after loading the config file. 

502 """ 

503 annotations = all_annotations(cls, _except=_except) 

504 

505 to_load = convert_config(data) 

506 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types) 

507 

508 if strict: 

509 to_load = ensure_types(to_load, annotations, convert_types=convert_types) 

510 

511 return to_load 

512 

513 

514T_init_list = list[typing.Any] 

515T_init_dict = dict[str, typing.Any] 

516T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None 

517 

518 

519@typing.no_type_check # (mypy doesn't understand 'match' fully yet) 

520def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]: 

521 """ 

522 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple. 

523 """ 

524 if not init: 

525 return [], {} 

526 

527 args: T_init_list = [] 

528 kwargs: T_init_dict = {} 

529 match init: 

530 case (args, kwargs): 

531 return args, kwargs 

532 case [*args]: 

533 return args, {} 

534 case {**kwargs}: 

535 return [], kwargs 

536 case _: 

537 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.") 

538 

539 

540def _load_into_recurse( 

541 cls: typing.Type[C], 

542 data: dict[str, typing.Any] | bytes, 

543 init: T_init = None, 

544 strict: bool = True, 

545 convert_types: bool = False, 

546) -> C: 

547 """ 

548 Loads an instance of `cls` filled with `data`. 

549 

550 Uses `load_recursive` to load any fillable annotated properties (see that method for an example). 

551 `init` can be used to optionally pass extra __init__ arguments. \ 

552 NOTE: This will overwrite a config key with the same name! 

553 """ 

554 init_args, init_kwargs = _split_init(init) 

555 

556 if isinstance(data, bytes) or issubclass(cls, BinaryConfig): 

557 if not isinstance(data, (bytes, dict)): # pragma: no cover 

558 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.") 

559 elif not issubclass(cls, BinaryConfig): # pragma: no cover 

560 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.") 

561 

562 inst = typing.cast(C, cls._parse_into(data)) 

563 elif dc.is_dataclass(cls): 

564 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types) 

565 if init: 

566 raise ValueError("Init is not allowed for dataclasses!") 

567 

568 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`) 

569 inst = typing.cast(C, cls(**to_load)) 

570 elif isinstance(data, cls): 

571 # already the right type! (e.g. Pathlib) 

572 inst = typing.cast(C, data) 

573 else: 

574 inst = cls(*init_args, **init_kwargs) 

575 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types) 

576 inst.__dict__.update(**to_load) 

577 

578 return inst 

579 

580 

581def _load_into_instance( 

582 inst: C, 

583 cls: typing.Type[C], 

584 data: dict[str, typing.Any], 

585 init: T_init = None, 

586 strict: bool = True, 

587 convert_types: bool = False, 

588) -> C: 

589 """ 

590 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \ 

591 and thus does not support init. 

592 

593 """ 

594 if init is not None: 

595 raise ValueError("Can not init an existing instance!") 

596 

597 existing_data = inst.__dict__ 

598 

599 to_load = check_and_convert_data( 

600 cls, 

601 data, 

602 _except=existing_data.keys(), 

603 strict=strict, 

604 convert_types=convert_types, 

605 ) 

606 

607 inst.__dict__.update(**to_load) 

608 

609 return inst 

610 

611 

612def load_into_class( 

613 cls: typing.Type[C], 

614 data: T_data, 

615 /, 

616 key: str = None, 

617 init: T_init = None, 

618 strict: bool = True, 

619 lower_keys: bool = False, 

620 convert_types: bool = False, 

621 use_env: UseEnvSetting = DEFAULT_ENV_SETTING, 

622) -> C: 

623 """ 

624 Shortcut for _load_data + load_into_recurse. 

625 """ 

626 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

627 to_load = load_data( 

628 data, 

629 key, 

630 cls.__name__, 

631 lower_keys=lower_keys, 

632 allow_types=allow_types, 

633 strict=strict, 

634 use_env=use_env, 

635 ) 

636 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types) 

637 

638 

639def load_into_instance( 

640 inst: C, 

641 data: T_data, 

642 /, 

643 key: str = None, 

644 init: T_init = None, 

645 strict: bool = True, 

646 lower_keys: bool = False, 

647 convert_types: bool = False, 

648 use_env: UseEnvSetting = DEFAULT_ENV_SETTING, 

649) -> C: 

650 """ 

651 Shortcut for _load_data + load_into_existing. 

652 """ 

653 cls = inst.__class__ 

654 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

655 to_load = load_data( 

656 data, 

657 key, 

658 cls.__name__, 

659 lower_keys=lower_keys, 

660 allow_types=allow_types, 

661 strict=strict, 

662 use_env=use_env, 

663 ) 

664 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types) 

665 

666 

667def load_into( 

668 cls: typing.Type[C], 

669 data: T_data = None, 

670 /, 

671 key: str = None, 

672 init: T_init = None, 

673 strict: bool = True, 

674 lower_keys: bool = False, 

675 convert_types: bool = False, 

676 use_env: UseEnvSetting = DEFAULT_ENV_SETTING, 

677) -> C: 

678 """ 

679 Load your config into a class (instance). 

680 

681 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only 

682 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`. 

683 

684 Args: 

685 cls: either a class or an existing instance of that class. 

686 data: can be a dictionary or a path to a file to load (as pathlib.Path or str) 

687 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific') 

688 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already) 

689 strict: enable type checks or allow anything? 

690 lower_keys: should the config keys be lowercased? (for .env) 

691 convert_types: should the types be converted to the annotated type if not yet matching? (for .env) 

692 use_env: Controls how ${VAR} placeholders are resolved. 

693 Determines which sources are consulted and in what order: 

694 

695 - "yes" (default): OS environment → .env 

696 - "inverse": .env → OS environment 

697 - "dotenv": .env only 

698 - "environ": OS environment only 

699 - "no": no interpolation 

700 """ 

701 if not isinstance(cls, type): 

702 # would not be supported according to mypy, but you can still load_into(instance) 

703 return load_into_instance( 

704 cls, 

705 data, 

706 key=key, 

707 init=init, 

708 strict=strict, 

709 lower_keys=lower_keys, 

710 convert_types=convert_types, 

711 use_env=use_env, 

712 ) 

713 

714 # make mypy and pycharm happy by telling it cls is of type C and not just 'type' 

715 # _cls = typing.cast(typing.Type[C], cls) 

716 return load_into_class( 

717 cls, 

718 data, 

719 key=key, 

720 init=init, 

721 strict=strict, 

722 lower_keys=lower_keys, 

723 convert_types=convert_types, 

724 use_env=use_env, 

725 )