Coverage for src/configuraptor/core.py: 100%
262 statements
« prev ^ index » next coverage.py v7.2.7, created at 2026-05-14 16:50 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2026-05-14 16:50 +0200
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import types
9import typing
10import warnings
11from pathlib import Path
12from typing import Any, Type
14import requests
15from dotenv import dotenv_values as _dotenv_values
16from dotenv import find_dotenv
18from . import loaders
19from .abs import DEFAULT_ENV_SETTING, AnyType, C, T, T_data, Type_C, UseEnvSetting
20from .alias import Alias, has_alias
21from .binary_config import BinaryConfig
22from .errors import (
23 ConfigErrorCouldNotConvert,
24 ConfigErrorInvalidType,
25 ConfigErrorMissingKey,
26 FailedToLoad,
27)
28from .helpers import (
29 all_annotations,
30 camel_to_snake,
31 check_type,
32 dataclass_field,
33 expand_env_vars_into_toml_values,
34 find_pyproject_toml,
35 is_custom_class,
36 is_optional,
37 is_parameterized,
38 is_union,
39)
40from .postpone import Postponed
41from .type_converters import CONVERTERS
44def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
45 """
46 If a key contains a dot, traverse the raw dict until the right key was found.
48 Example:
49 key = some.nested.key
50 raw = {"some": {"nested": {"key": {"with": "data"}}}}
51 -> {"with": "data"}
52 """
53 parts = key.split(".")
54 while parts:
55 key = parts.pop(0)
56 if key not in raw:
57 return {}
59 raw = raw[key]
61 return raw
64def _guess_key(clsname: str) -> str:
65 """
66 If no key is manually defined for `load_into`, \
67 the class' name is converted to snake_case to use as the default key.
68 """
69 return camel_to_snake(clsname)
72def _from_mock_url(url: str) -> str:
73 """
74 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
75 """
76 return url.removeprefix("mock://")
79def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
80 """
81 Based on the url (which may have an extension) and the requests response \
82 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
84 Falls back to JSON if none can be found.
85 """
86 url = url.split("?")[0]
87 if url_extension := os.path.splitext(url)[1].lower():
88 return url_extension.strip(".")
90 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
91 content_type = content_type_header.split("/")[-1]
92 if content_type != "plain":
93 return content_type
95 # If both methods fail, default to JSON
96 return "json"
99def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
100 """
101 Load data as bytes into a file-like object and return the file type.
103 This can be used by __load_data:
104 > loader = loaders.get(filetype)
105 > # dev/null exists but always returns b''
106 > data = loader(contents, Path("/dev/null"))
107 """
108 if url.startswith("mock://"):
109 data = _from_mock_url(url)
110 resp = None
111 elif _dummy:
112 resp = None
113 data = "{}"
114 else:
115 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1"
117 resp = requests.get(url, timeout=10, verify=ssl_verify)
118 data = resp.text
120 filetype = guess_filetype_for_url(url, resp)
121 return io.BytesIO(data.encode()), filetype
124def dotenv_values() -> dict[str, str | None]:
125 """Wrapper around dotenv.dotenv_values that uses .env in cwd."""
126 return _dotenv_values(dotenv_path=find_dotenv(usecwd=True))
129def apply_env(data: dict[str, typing.Any], use_env: UseEnvSetting) -> None:
130 """
131 Apply the desired env-setting logic on data.
132 """
133 match use_env:
134 case "yes":
135 env = dotenv_values() | os.environ
136 case "inverse":
137 env = os.environ | dotenv_values()
138 case "dotenv":
139 env = dotenv_values()
140 case "environ":
141 env = {**os.environ}
142 case _: # pragma: no cover
143 return
145 expand_env_vars_into_toml_values(data, env)
148def _load_data(
149 data: T_data,
150 key: str = None,
151 classname: str = None,
152 lower_keys: bool = False,
153 allow_types: tuple[type, ...] = (dict,),
154 strict: bool = False,
155 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
156) -> dict[str, typing.Any]:
157 """
158 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
160 E.g. class Tool will be mapped to key tool.
161 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
162 """
163 if isinstance(data, bytes):
164 # instantly return, don't modify
165 # bytes as inputs -> bytes as output
166 # but since `T_data` is re-used, that's kind of hard to type for mypy.
167 return data # type: ignore
169 if isinstance(data, list):
170 if not data:
171 raise ValueError("Empty list passed!")
173 final_data: dict[str, typing.Any] = {}
174 for source in data:
175 final_data |= load_data(
176 source,
177 key=key,
178 classname=classname,
179 lower_keys=True,
180 allow_types=allow_types,
181 strict=strict,
182 use_env=use_env,
183 )
185 return final_data
187 if isinstance(data, str):
188 if data.startswith(("http://", "https://", "mock://")):
189 contents, filetype = from_url(data)
191 loader = loaders.get(filetype)
192 # dev/null exists but always returns b''
193 data = loader(contents, Path("/dev/null"))
194 else:
195 data = Path(data)
197 if isinstance(data, Path):
198 with data.open("rb") as f:
199 loader = loaders.get(data.suffix or data.name)
200 data = loader(f, data.resolve())
202 if not data:
203 return {}
205 if key is None:
206 # try to guess key by grabbing the first one or using the class name
207 if len(data) == 1:
208 key = next(iter(data.keys()))
209 elif classname is not None:
210 key = _guess_key(classname)
212 if key:
213 data = _data_for_nested_key(key, data)
215 if not data:
216 raise ValueError("No data found!")
218 if not isinstance(data, allow_types):
219 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
221 if lower_keys and isinstance(data, dict):
222 data = {k.lower(): v for k, v in data.items()}
224 if use_env != "no" and isinstance(data, dict):
225 apply_env(data, use_env)
227 return typing.cast(dict[str, typing.Any], data)
230def load_data(
231 data: T_data,
232 key: str = None,
233 classname: str = None,
234 lower_keys: bool = False,
235 allow_types: tuple[type, ...] = (dict,),
236 strict: bool = False,
237 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
238) -> dict[str, typing.Any]:
239 """
240 Wrapper around __load_data that retries with key="" if anything goes wrong.
241 """
242 if data is None:
243 # try to load pyproject.toml
244 data = find_pyproject_toml()
246 try:
247 return _load_data(
248 data,
249 key,
250 classname,
251 lower_keys=lower_keys,
252 allow_types=allow_types,
253 strict=strict,
254 use_env=use_env,
255 )
256 except Exception as e:
257 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches
258 # @sourcery: `key != ""` is NOT the same as `not key`
259 if key != "":
260 # try again with key ""
261 return load_data(
262 data,
263 "",
264 classname,
265 lower_keys=lower_keys,
266 allow_types=allow_types,
267 strict=strict,
268 use_env=use_env,
269 )
270 elif strict:
271 raise FailedToLoad(data) from e
272 else:
273 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json
274 warnings.warn(f"Data ('{data!r}') could not be loaded", source=e, category=UserWarning)
275 return {}
278F = typing.TypeVar("F")
281def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T:
282 """
283 Convert a value between types.
284 """
285 if converter := CONVERTERS.get((from_type, to_type)):
286 return typing.cast(T, converter(from_value))
288 # default: just convert type:
289 return to_type(from_value) # type: ignore
292def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T:
293 """
294 Checks if the given value matches the specified type. If it does, the value is returned as is.
296 Args:
297 value (Any): The value to be checked and potentially converted.
298 _type (Type[T]): The expected type for the value.
299 convert_types (bool): If True, allows type conversion if the types do not match.
300 key (str, optional): The name or key associated with the variable (used in error messages).
301 Defaults to "variable".
303 Returns:
304 T: The value, potentially converted to the expected type.
306 Raises:
307 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed.
308 ConfigErrorCouldNotConvert: If type conversion fails.
309 """
310 if check_type(value, _type):
311 # type matches
312 return value
314 if isinstance(value, Alias):
315 if is_optional(_type):
316 return typing.cast(T, None)
317 else:
318 # unresolved alias, error should've already been thrown for parent but lets do it again:
319 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type)
321 if not convert_types:
322 # type does not match and should not be converted
323 raise ConfigErrorInvalidType(key, value=value, expected_type=_type)
325 # else: type does not match, try to convert it
326 try:
327 return convert_between(value, type(value), _type)
328 except (TypeError, ValueError) as e:
329 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e
332def ensure_types(
333 data: dict[str, T],
334 annotations: dict[str, type[T]],
335 convert_types: bool = False,
336) -> dict[str, T | None]:
337 """
338 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
340 If an annotated key in missing from data, it will be filled with None for convenience.
342 TODO: python 3.11 exception groups to throw multiple errors at once!
343 """
344 # custom object to use instead of None, since typing.Optional can be None!
345 # cast to T to make mypy happy
346 notfound = typing.cast(T, object())
348 final: dict[str, T | None] = {}
349 for key, _type in annotations.items():
350 compare = data.get(key, notfound)
351 if compare is notfound: # pragma: nocover
352 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`")
353 # skip!
354 continue
356 if isinstance(compare, Postponed):
357 # don't do anything with this item!
358 continue
360 if isinstance(compare, Alias):
361 related_data = data.get(compare.to, notfound)
362 if related_data is not notfound:
363 if isinstance(related_data, Postponed):
364 # also continue alias for postponed items
365 continue
367 # original key set, update alias
368 compare = related_data
370 compare = check_and_convert_type(compare, _type, convert_types, key)
372 final[key] = compare
374 return final
377def convert_key(key: str) -> str:
378 """
379 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
380 """
381 return key.replace("-", "_").replace(".", "_")
384def convert_config(items: dict[str, T]) -> dict[str, T]:
385 """
386 Converts the config dict (from toml) or 'overwrites' dict in two ways.
388 1. removes any items where the value is None, since in that case the default should be used;
389 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
390 """
391 return {convert_key(k): v for k, v in items.items() if v is not None}
394def load_recursive(
395 cls: AnyType,
396 data: dict[str, T],
397 annotations: dict[str, AnyType],
398 convert_types: bool = False,
399) -> dict[str, T]:
400 """
401 For all annotations (recursively gathered from parents with `all_annotations`), \
402 try to resolve the tree of annotations.
404 Uses `load_into_recurse`, not itself directly.
406 Example:
407 class First:
408 key: str
410 class Second:
411 other: First
413 # step 1
414 cls = Second
415 data = {"second": {"other": {"key": "anything"}}}
416 annotations: {"other": First}
418 # step 1.5
419 data = {"other": {"key": "anything"}
420 annotations: {"other": First}
422 # step 2
423 cls = First
424 data = {"key": "anything"}
425 annotations: {"key": str}
428 TODO: python 3.11 exception groups to throw multiple errors at once!
429 """
430 updated = {}
432 for _key, _type in annotations.items():
434 # fixme:
435 # if defaultable or optional[defaultable] and key is not in data: return Default()
436 # if defaultable or optional[defaultable] and key is in data but falsey: return None
438 if _key in data:
439 value: typing.Any = data[_key] # value can change so define it as any instead of T
440 if is_parameterized(_type):
441 origin = typing.get_origin(_type)
442 arguments = typing.get_args(_type)
443 if origin is list and arguments and is_custom_class(arguments[0]):
444 subtype = arguments[0]
445 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
447 elif origin is dict and arguments and is_custom_class(arguments[1]):
448 # e.g. dict[str, Point]
449 subkeytype, subvaluetype = arguments
450 # subkey(type) is not a custom class, so don't try to convert it:
451 value = {
452 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
453 for subkey, subvalue in value.items()
454 }
455 # elif origin is dict:
456 # keep data the same
457 elif is_union(_type) and arguments:
458 if convert_types and types.NoneType in arguments and not value:
459 value = None
460 updated[_key] = value
461 continue
463 for arg in arguments:
464 if is_custom_class(arg) and isinstance(value, (dict, arg)):
465 value = _load_into_recurse(arg, value, convert_types=convert_types)
467 elif is_custom_class(_type):
468 # type must be C (custom class) at this point; includes dataclass but not optional[cls]
469 value = _load_into_recurse(
470 # make mypy and pycharm happy by telling it _type is of type C...
471 # actually just passing _type as first arg!
472 typing.cast(Type_C[typing.Any], _type),
473 value,
474 convert_types=convert_types,
475 )
477 # else: normal value, don't change
479 elif value := has_alias(cls, _key, data):
480 # value updated by alias
481 ...
482 elif _key in cls.__dict__:
483 # property has default, use that instead.
484 value = cls.__dict__[_key]
485 elif (defaultable := is_defaultable(_type, with_optional=True)) is not None:
486 value = defaultable.default()
487 elif is_optional(_type):
488 # type is optional and not found in __dict__ -> default is None
489 value = None
490 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
491 # could have a default factory
492 # todo: do something with field.default?
493 value = field.default_factory()
494 else:
495 raise ConfigErrorMissingKey(_key, cls, _type)
497 updated[_key] = value
499 return updated
502def check_and_convert_data(
503 cls: typing.Type[C],
504 data: dict[str, typing.Any],
505 _except: typing.Iterable[str],
506 strict: bool = True,
507 convert_types: bool = False,
508) -> dict[str, typing.Any]:
509 """
510 Based on class annotations, this prepares the data for `load_into_recurse`.
512 1. convert config-keys to python compatible config_keys
513 2. loads custom class type annotations with the same logic (see also `load_recursive`)
514 3. ensures the annotated types match the actual types after loading the config file.
515 """
516 annotations = all_annotations(cls, _except=_except)
518 to_load = convert_config(data)
519 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
521 if strict:
522 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
524 return to_load
527T_init_list = list[typing.Any]
528T_init_dict = dict[str, typing.Any]
529T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
532@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
533def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
534 """
535 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
536 """
537 if not init:
538 return [], {}
540 args: T_init_list = []
541 kwargs: T_init_dict = {}
542 match init:
543 case (args, kwargs):
544 return args, kwargs
545 case [*args]:
546 return args, {}
547 case {**kwargs}:
548 return [], kwargs
549 case _:
550 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
553def _load_into_recurse(
554 cls: typing.Type[C],
555 data: dict[str, typing.Any] | bytes,
556 init: T_init = None,
557 strict: bool = True,
558 convert_types: bool = False,
559) -> C:
560 """
561 Loads an instance of `cls` filled with `data`.
563 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
564 `init` can be used to optionally pass extra __init__ arguments. \
565 NOTE: This will overwrite a config key with the same name!
566 """
567 init_args, init_kwargs = _split_init(init)
569 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
570 if not isinstance(data, (bytes, dict)): # pragma: no cover
571 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
572 elif not issubclass(cls, BinaryConfig): # pragma: no cover
573 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
575 inst = typing.cast(C, cls._parse_into(data))
576 elif dc.is_dataclass(cls):
577 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
578 if init:
579 raise ValueError("Init is not allowed for dataclasses!")
581 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
582 inst = typing.cast(C, cls(**to_load))
583 elif isinstance(data, cls):
584 # already the right type! (e.g. Pathlib)
585 inst = typing.cast(C, data)
586 else:
587 inst = cls(*init_args, **init_kwargs)
588 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
589 inst.__dict__.update(**to_load)
591 return inst
594def _load_into_instance(
595 inst: C,
596 cls: typing.Type[C],
597 data: dict[str, typing.Any],
598 init: T_init = None,
599 strict: bool = True,
600 convert_types: bool = False,
601) -> C:
602 """
603 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
604 and thus does not support init.
606 """
607 if init is not None:
608 raise ValueError("Can not init an existing instance!")
610 existing_data = inst.__dict__
612 to_load = check_and_convert_data(
613 cls,
614 data,
615 _except=existing_data.keys(),
616 strict=strict,
617 convert_types=convert_types,
618 )
620 inst.__dict__.update(**to_load)
622 return inst
625def load_into_class(
626 cls: typing.Type[C],
627 data: T_data,
628 /,
629 key: str = None,
630 init: T_init = None,
631 strict: bool = True,
632 lower_keys: bool = False,
633 convert_types: bool = False,
634 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
635) -> C:
636 """
637 Shortcut for _load_data + load_into_recurse.
638 """
639 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
640 to_load = load_data(
641 data,
642 key,
643 cls.__name__,
644 lower_keys=lower_keys,
645 allow_types=allow_types,
646 strict=strict,
647 use_env=use_env,
648 )
649 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
652def load_into_instance(
653 inst: C,
654 data: T_data,
655 /,
656 key: str = None,
657 init: T_init = None,
658 strict: bool = True,
659 lower_keys: bool = False,
660 convert_types: bool = False,
661 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
662) -> C:
663 """
664 Shortcut for _load_data + load_into_existing.
665 """
666 cls = inst.__class__
667 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
668 to_load = load_data(
669 data,
670 key,
671 cls.__name__,
672 lower_keys=lower_keys,
673 allow_types=allow_types,
674 strict=strict,
675 use_env=use_env,
676 )
677 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
680def load_into(
681 cls: typing.Type[C],
682 data: T_data = None,
683 /,
684 key: str = None,
685 init: T_init = None,
686 strict: bool = True,
687 lower_keys: bool = False,
688 convert_types: bool = False,
689 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
690) -> C:
691 """
692 Load your config into a class (instance).
694 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
695 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
697 Args:
698 cls: either a class or an existing instance of that class.
699 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
700 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
701 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
702 strict: enable type checks or allow anything?
703 lower_keys: should the config keys be lowercased? (for .env)
704 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
705 use_env: Controls how ${VAR} placeholders are resolved.
706 Determines which sources are consulted and in what order:
708 - "yes" (default): OS environment → .env
709 - "inverse": .env → OS environment
710 - "dotenv": .env only
711 - "environ": OS environment only
712 - "no": no interpolation
713 """
714 result: C
716 if not isinstance(cls, type):
717 # would not be supported according to mypy, but you can still load_into(instance)
718 result = load_into_instance(
719 cls,
720 data,
721 key=key,
722 init=init,
723 strict=strict,
724 lower_keys=lower_keys,
725 convert_types=convert_types,
726 use_env=use_env,
727 )
728 else:
729 # get instance of cls()
730 result = load_into_class(
731 cls,
732 data,
733 key=key,
734 init=init,
735 strict=strict,
736 lower_keys=lower_keys,
737 convert_types=convert_types,
738 use_env=use_env,
739 )
741 post_init = getattr(result, "__post_init__", None)
742 if callable(post_init) and not dc.is_dataclass(result):
743 post_init()
745 return result
748class Defaultable:
749 """
750 Explicit opt-in for classes that can construct a default instance.
751 """
753 @classmethod
754 def default(cls) -> typing.Self:
755 """
756 Return a default instance of `cls`.
757 """
758 return load_into(cls, {})
760def is_defaultable(_type: type | typing.Any, with_optional: bool = False) -> type[Defaultable] | None:
761 """
762 Return the Defaultable class for `_type`, if present.
764 If `with_optional` is enabled and `_type` is a union (e.g. `MyDefaultable | None`),
765 the first `Defaultable` branch is returned.
766 """
767 if with_optional and is_optional(_type):
768 # unpack union and return first branch that is Defaultable
769 return next(
770 (
771 arg
772 for arg in typing.get_args(_type)
773 if is_custom_class(arg) and issubclass(arg, Defaultable)
774 ),
775 None,
776 )
777 else:
778 return _type if is_custom_class(_type) and issubclass(_type, Defaultable) else None