Coverage for src/configuraptor/core.py: 100%
258 statements
« prev ^ index » next coverage.py v7.2.7, created at 2026-05-14 16:24 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2026-05-14 16:24 +0200
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import types
9import typing
10import warnings
11from pathlib import Path
12from typing import Any, Type
14import requests
15from dotenv import dotenv_values as _dotenv_values
16from dotenv import find_dotenv
18from . import loaders
19from .abs import DEFAULT_ENV_SETTING, AnyType, C, T, T_data, Type_C, UseEnvSetting
20from .alias import Alias, has_alias
21from .binary_config import BinaryConfig
22from .errors import (
23 ConfigErrorCouldNotConvert,
24 ConfigErrorInvalidType,
25 ConfigErrorMissingKey,
26 FailedToLoad,
27)
28from .helpers import (
29 all_annotations,
30 camel_to_snake,
31 check_type,
32 dataclass_field,
33 expand_env_vars_into_toml_values,
34 find_pyproject_toml,
35 is_custom_class,
36 is_optional,
37 is_parameterized,
38 is_union,
39)
40from .postpone import Postponed
41from .type_converters import CONVERTERS
44def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
45 """
46 If a key contains a dot, traverse the raw dict until the right key was found.
48 Example:
49 key = some.nested.key
50 raw = {"some": {"nested": {"key": {"with": "data"}}}}
51 -> {"with": "data"}
52 """
53 parts = key.split(".")
54 while parts:
55 key = parts.pop(0)
56 if key not in raw:
57 return {}
59 raw = raw[key]
61 return raw
64def _guess_key(clsname: str) -> str:
65 """
66 If no key is manually defined for `load_into`, \
67 the class' name is converted to snake_case to use as the default key.
68 """
69 return camel_to_snake(clsname)
72def _from_mock_url(url: str) -> str:
73 """
74 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
75 """
76 return url.removeprefix("mock://")
79def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
80 """
81 Based on the url (which may have an extension) and the requests response \
82 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
84 Falls back to JSON if none can be found.
85 """
86 url = url.split("?")[0]
87 if url_extension := os.path.splitext(url)[1].lower():
88 return url_extension.strip(".")
90 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
91 content_type = content_type_header.split("/")[-1]
92 if content_type != "plain":
93 return content_type
95 # If both methods fail, default to JSON
96 return "json"
99def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
100 """
101 Load data as bytes into a file-like object and return the file type.
103 This can be used by __load_data:
104 > loader = loaders.get(filetype)
105 > # dev/null exists but always returns b''
106 > data = loader(contents, Path("/dev/null"))
107 """
108 if url.startswith("mock://"):
109 data = _from_mock_url(url)
110 resp = None
111 elif _dummy:
112 resp = None
113 data = "{}"
114 else:
115 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1"
117 resp = requests.get(url, timeout=10, verify=ssl_verify)
118 data = resp.text
120 filetype = guess_filetype_for_url(url, resp)
121 return io.BytesIO(data.encode()), filetype
124def dotenv_values() -> dict[str, str | None]:
125 """Wrapper around dotenv.dotenv_values that uses .env in cwd."""
126 return _dotenv_values(dotenv_path=find_dotenv(usecwd=True))
129def apply_env(data: dict[str, typing.Any], use_env: UseEnvSetting) -> None:
130 """
131 Apply the desired env-setting logic on data.
132 """
133 match use_env:
134 case "yes":
135 env = dotenv_values() | os.environ
136 case "inverse":
137 env = os.environ | dotenv_values()
138 case "dotenv":
139 env = dotenv_values()
140 case "environ":
141 env = {**os.environ}
142 case _: # pragma: no cover
143 return
145 expand_env_vars_into_toml_values(data, env)
148def _load_data(
149 data: T_data,
150 key: str = None,
151 classname: str = None,
152 lower_keys: bool = False,
153 allow_types: tuple[type, ...] = (dict,),
154 strict: bool = False,
155 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
156) -> dict[str, typing.Any]:
157 """
158 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
160 E.g. class Tool will be mapped to key tool.
161 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
162 """
163 if isinstance(data, bytes):
164 # instantly return, don't modify
165 # bytes as inputs -> bytes as output
166 # but since `T_data` is re-used, that's kind of hard to type for mypy.
167 return data # type: ignore
169 if isinstance(data, list):
170 if not data:
171 raise ValueError("Empty list passed!")
173 final_data: dict[str, typing.Any] = {}
174 for source in data:
175 final_data |= load_data(
176 source,
177 key=key,
178 classname=classname,
179 lower_keys=True,
180 allow_types=allow_types,
181 strict=strict,
182 use_env=use_env,
183 )
185 return final_data
187 if isinstance(data, str):
188 if data.startswith(("http://", "https://", "mock://")):
189 contents, filetype = from_url(data)
191 loader = loaders.get(filetype)
192 # dev/null exists but always returns b''
193 data = loader(contents, Path("/dev/null"))
194 else:
195 data = Path(data)
197 if isinstance(data, Path):
198 with data.open("rb") as f:
199 loader = loaders.get(data.suffix or data.name)
200 data = loader(f, data.resolve())
202 if not data:
203 return {}
205 if key is None:
206 # try to guess key by grabbing the first one or using the class name
207 if len(data) == 1:
208 key = next(iter(data.keys()))
209 elif classname is not None:
210 key = _guess_key(classname)
212 if key:
213 data = _data_for_nested_key(key, data)
215 if not data:
216 raise ValueError("No data found!")
218 if not isinstance(data, allow_types):
219 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
221 if lower_keys and isinstance(data, dict):
222 data = {k.lower(): v for k, v in data.items()}
224 if use_env != "no" and isinstance(data, dict):
225 apply_env(data, use_env)
227 return typing.cast(dict[str, typing.Any], data)
230def load_data(
231 data: T_data,
232 key: str = None,
233 classname: str = None,
234 lower_keys: bool = False,
235 allow_types: tuple[type, ...] = (dict,),
236 strict: bool = False,
237 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
238) -> dict[str, typing.Any]:
239 """
240 Wrapper around __load_data that retries with key="" if anything goes wrong.
241 """
242 if data is None:
243 # try to load pyproject.toml
244 data = find_pyproject_toml()
246 try:
247 return _load_data(
248 data,
249 key,
250 classname,
251 lower_keys=lower_keys,
252 allow_types=allow_types,
253 strict=strict,
254 use_env=use_env,
255 )
256 except Exception as e:
257 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches
258 # @sourcery: `key != ""` is NOT the same as `not key`
259 if key != "":
260 # try again with key ""
261 return load_data(
262 data,
263 "",
264 classname,
265 lower_keys=lower_keys,
266 allow_types=allow_types,
267 strict=strict,
268 use_env=use_env,
269 )
270 elif strict:
271 raise FailedToLoad(data) from e
272 else:
273 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json
274 warnings.warn(f"Data ('{data!r}') could not be loaded", source=e, category=UserWarning)
275 return {}
278F = typing.TypeVar("F")
281def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T:
282 """
283 Convert a value between types.
284 """
285 if converter := CONVERTERS.get((from_type, to_type)):
286 return typing.cast(T, converter(from_value))
288 # default: just convert type:
289 return to_type(from_value) # type: ignore
292def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T:
293 """
294 Checks if the given value matches the specified type. If it does, the value is returned as is.
296 Args:
297 value (Any): The value to be checked and potentially converted.
298 _type (Type[T]): The expected type for the value.
299 convert_types (bool): If True, allows type conversion if the types do not match.
300 key (str, optional): The name or key associated with the variable (used in error messages).
301 Defaults to "variable".
303 Returns:
304 T: The value, potentially converted to the expected type.
306 Raises:
307 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed.
308 ConfigErrorCouldNotConvert: If type conversion fails.
309 """
310 if check_type(value, _type):
311 # type matches
312 return value
314 if isinstance(value, Alias):
315 if is_optional(_type):
316 return typing.cast(T, None)
317 else:
318 # unresolved alias, error should've already been thrown for parent but lets do it again:
319 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type)
321 if not convert_types:
322 # type does not match and should not be converted
323 raise ConfigErrorInvalidType(key, value=value, expected_type=_type)
325 # else: type does not match, try to convert it
326 try:
327 return convert_between(value, type(value), _type)
328 except (TypeError, ValueError) as e:
329 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e
332def ensure_types(
333 data: dict[str, T],
334 annotations: dict[str, type[T]],
335 convert_types: bool = False,
336) -> dict[str, T | None]:
337 """
338 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
340 If an annotated key in missing from data, it will be filled with None for convenience.
342 TODO: python 3.11 exception groups to throw multiple errors at once!
343 """
344 # custom object to use instead of None, since typing.Optional can be None!
345 # cast to T to make mypy happy
346 notfound = typing.cast(T, object())
348 final: dict[str, T | None] = {}
349 for key, _type in annotations.items():
350 compare = data.get(key, notfound)
351 if compare is notfound: # pragma: nocover
352 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`")
353 # skip!
354 continue
356 if isinstance(compare, Postponed):
357 # don't do anything with this item!
358 continue
360 if isinstance(compare, Alias):
361 related_data = data.get(compare.to, notfound)
362 if related_data is not notfound:
363 if isinstance(related_data, Postponed):
364 # also continue alias for postponed items
365 continue
367 # original key set, update alias
368 compare = related_data
370 compare = check_and_convert_type(compare, _type, convert_types, key)
372 final[key] = compare
374 return final
377def convert_key(key: str) -> str:
378 """
379 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
380 """
381 return key.replace("-", "_").replace(".", "_")
384def convert_config(items: dict[str, T]) -> dict[str, T]:
385 """
386 Converts the config dict (from toml) or 'overwrites' dict in two ways.
388 1. removes any items where the value is None, since in that case the default should be used;
389 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
390 """
391 return {convert_key(k): v for k, v in items.items() if v is not None}
394def load_recursive(
395 cls: AnyType,
396 data: dict[str, T],
397 annotations: dict[str, AnyType],
398 convert_types: bool = False,
399) -> dict[str, T]:
400 """
401 For all annotations (recursively gathered from parents with `all_annotations`), \
402 try to resolve the tree of annotations.
404 Uses `load_into_recurse`, not itself directly.
406 Example:
407 class First:
408 key: str
410 class Second:
411 other: First
413 # step 1
414 cls = Second
415 data = {"second": {"other": {"key": "anything"}}}
416 annotations: {"other": First}
418 # step 1.5
419 data = {"other": {"key": "anything"}
420 annotations: {"other": First}
422 # step 2
423 cls = First
424 data = {"key": "anything"}
425 annotations: {"key": str}
428 TODO: python 3.11 exception groups to throw multiple errors at once!
429 """
430 updated = {}
432 for _key, _type in annotations.items():
433 if _key in data:
434 value: typing.Any = data[_key] # value can change so define it as any instead of T
435 if is_parameterized(_type):
436 origin = typing.get_origin(_type)
437 arguments = typing.get_args(_type)
438 if origin is list and arguments and is_custom_class(arguments[0]):
439 subtype = arguments[0]
440 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
442 elif origin is dict and arguments and is_custom_class(arguments[1]):
443 # e.g. dict[str, Point]
444 subkeytype, subvaluetype = arguments
445 # subkey(type) is not a custom class, so don't try to convert it:
446 value = {
447 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
448 for subkey, subvalue in value.items()
449 }
450 # elif origin is dict:
451 # keep data the same
452 elif is_union(_type) and arguments:
453 if convert_types and types.NoneType in arguments and not value:
454 value = None
455 updated[_key] = value
456 continue
458 for arg in arguments:
459 if is_custom_class(arg) and (isinstance(value, dict) or isinstance(value, arg)):
460 value = _load_into_recurse(arg, value, convert_types=convert_types)
462 elif is_custom_class(_type):
463 # type must be C (custom class) at this point; includes dataclass but not optional[cls]
464 value = _load_into_recurse(
465 # make mypy and pycharm happy by telling it _type is of type C...
466 # actually just passing _type as first arg!
467 typing.cast(Type_C[typing.Any], _type),
468 value,
469 convert_types=convert_types,
470 )
472 # else: normal value, don't change
474 elif value := has_alias(cls, _key, data):
475 # value updated by alias
476 ...
477 elif _key in cls.__dict__:
478 # property has default, use that instead.
479 value = cls.__dict__[_key]
480 elif is_optional(_type):
481 # type is optional and not found in __dict__ -> default is None
482 value = None
483 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
484 # could have a default factory
485 # todo: do something with field.default?
486 value = field.default_factory()
487 elif is_custom_class(_type) and isinstance(_type, type) and issubclass(_type, Defaultable):
488 value = _type.default()
489 else:
490 raise ConfigErrorMissingKey(_key, cls, _type)
492 updated[_key] = value
494 return updated
497def check_and_convert_data(
498 cls: typing.Type[C],
499 data: dict[str, typing.Any],
500 _except: typing.Iterable[str],
501 strict: bool = True,
502 convert_types: bool = False,
503) -> dict[str, typing.Any]:
504 """
505 Based on class annotations, this prepares the data for `load_into_recurse`.
507 1. convert config-keys to python compatible config_keys
508 2. loads custom class type annotations with the same logic (see also `load_recursive`)
509 3. ensures the annotated types match the actual types after loading the config file.
510 """
511 annotations = all_annotations(cls, _except=_except)
513 to_load = convert_config(data)
514 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
516 if strict:
517 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
519 return to_load
522T_init_list = list[typing.Any]
523T_init_dict = dict[str, typing.Any]
524T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
527@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
528def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
529 """
530 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
531 """
532 if not init:
533 return [], {}
535 args: T_init_list = []
536 kwargs: T_init_dict = {}
537 match init:
538 case (args, kwargs):
539 return args, kwargs
540 case [*args]:
541 return args, {}
542 case {**kwargs}:
543 return [], kwargs
544 case _:
545 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
548def _load_into_recurse(
549 cls: typing.Type[C],
550 data: dict[str, typing.Any] | bytes,
551 init: T_init = None,
552 strict: bool = True,
553 convert_types: bool = False,
554) -> C:
555 """
556 Loads an instance of `cls` filled with `data`.
558 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
559 `init` can be used to optionally pass extra __init__ arguments. \
560 NOTE: This will overwrite a config key with the same name!
561 """
562 init_args, init_kwargs = _split_init(init)
564 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
565 if not isinstance(data, (bytes, dict)): # pragma: no cover
566 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
567 elif not issubclass(cls, BinaryConfig): # pragma: no cover
568 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
570 inst = typing.cast(C, cls._parse_into(data))
571 elif dc.is_dataclass(cls):
572 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
573 if init:
574 raise ValueError("Init is not allowed for dataclasses!")
576 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
577 inst = typing.cast(C, cls(**to_load))
578 elif isinstance(data, cls):
579 # already the right type! (e.g. Pathlib)
580 inst = typing.cast(C, data)
581 else:
582 inst = cls(*init_args, **init_kwargs)
583 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
584 inst.__dict__.update(**to_load)
586 return inst
589def _load_into_instance(
590 inst: C,
591 cls: typing.Type[C],
592 data: dict[str, typing.Any],
593 init: T_init = None,
594 strict: bool = True,
595 convert_types: bool = False,
596) -> C:
597 """
598 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
599 and thus does not support init.
601 """
602 if init is not None:
603 raise ValueError("Can not init an existing instance!")
605 existing_data = inst.__dict__
607 to_load = check_and_convert_data(
608 cls,
609 data,
610 _except=existing_data.keys(),
611 strict=strict,
612 convert_types=convert_types,
613 )
615 inst.__dict__.update(**to_load)
617 return inst
620def load_into_class(
621 cls: typing.Type[C],
622 data: T_data,
623 /,
624 key: str = None,
625 init: T_init = None,
626 strict: bool = True,
627 lower_keys: bool = False,
628 convert_types: bool = False,
629 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
630) -> C:
631 """
632 Shortcut for _load_data + load_into_recurse.
633 """
634 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
635 to_load = load_data(
636 data,
637 key,
638 cls.__name__,
639 lower_keys=lower_keys,
640 allow_types=allow_types,
641 strict=strict,
642 use_env=use_env,
643 )
644 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
647def load_into_instance(
648 inst: C,
649 data: T_data,
650 /,
651 key: str = None,
652 init: T_init = None,
653 strict: bool = True,
654 lower_keys: bool = False,
655 convert_types: bool = False,
656 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
657) -> C:
658 """
659 Shortcut for _load_data + load_into_existing.
660 """
661 cls = inst.__class__
662 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
663 to_load = load_data(
664 data,
665 key,
666 cls.__name__,
667 lower_keys=lower_keys,
668 allow_types=allow_types,
669 strict=strict,
670 use_env=use_env,
671 )
672 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
675def load_into(
676 cls: typing.Type[C],
677 data: T_data = None,
678 /,
679 key: str = None,
680 init: T_init = None,
681 strict: bool = True,
682 lower_keys: bool = False,
683 convert_types: bool = False,
684 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
685) -> C:
686 """
687 Load your config into a class (instance).
689 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
690 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
692 Args:
693 cls: either a class or an existing instance of that class.
694 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
695 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
696 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
697 strict: enable type checks or allow anything?
698 lower_keys: should the config keys be lowercased? (for .env)
699 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
700 use_env: Controls how ${VAR} placeholders are resolved.
701 Determines which sources are consulted and in what order:
703 - "yes" (default): OS environment → .env
704 - "inverse": .env → OS environment
705 - "dotenv": .env only
706 - "environ": OS environment only
707 - "no": no interpolation
708 """
709 result: C
711 if not isinstance(cls, type):
712 # would not be supported according to mypy, but you can still load_into(instance)
713 result = load_into_instance(
714 cls,
715 data,
716 key=key,
717 init=init,
718 strict=strict,
719 lower_keys=lower_keys,
720 convert_types=convert_types,
721 use_env=use_env,
722 )
723 else:
724 # get instance of cls()
725 result = load_into_class(
726 cls,
727 data,
728 key=key,
729 init=init,
730 strict=strict,
731 lower_keys=lower_keys,
732 convert_types=convert_types,
733 use_env=use_env,
734 )
736 post_init = getattr(result, "__post_init__", None)
737 if callable(post_init) and not dc.is_dataclass(result):
738 post_init()
740 return result
743class Defaultable:
744 """
745 Explicit opt-in for classes that can construct a default instance.
746 """
748 @classmethod
749 def default(cls) -> typing.Self:
750 """
751 Return a default instance of `cls`.
752 """
753 return load_into(cls, {})