Coverage for src/configuraptor/core.py: 100%
243 statements
« prev ^ index » next coverage.py v7.2.7, created at 2026-02-11 11:46 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2026-02-11 11:46 +0100
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import typing
9import warnings
10from pathlib import Path
11from typing import Any, Type
13import requests
14from dotenv import dotenv_values as _dotenv_values
15from dotenv import find_dotenv
17from . import loaders
18from .abs import DEFAULT_ENV_SETTING, AnyType, C, T, T_data, Type_C, UseEnvSetting
19from .alias import Alias, has_alias
20from .binary_config import BinaryConfig
21from .errors import (
22 ConfigErrorCouldNotConvert,
23 ConfigErrorInvalidType,
24 ConfigErrorMissingKey,
25 FailedToLoad,
26)
27from .helpers import (
28 all_annotations,
29 camel_to_snake,
30 check_type,
31 dataclass_field,
32 expand_env_vars_into_toml_values,
33 find_pyproject_toml,
34 is_custom_class,
35 is_optional,
36 is_parameterized,
37 is_union,
38)
39from .postpone import Postponed
40from .type_converters import CONVERTERS
43def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
44 """
45 If a key contains a dot, traverse the raw dict until the right key was found.
47 Example:
48 key = some.nested.key
49 raw = {"some": {"nested": {"key": {"with": "data"}}}}
50 -> {"with": "data"}
51 """
52 parts = key.split(".")
53 while parts:
54 key = parts.pop(0)
55 if key not in raw:
56 return {}
58 raw = raw[key]
60 return raw
63def _guess_key(clsname: str) -> str:
64 """
65 If no key is manually defined for `load_into`, \
66 the class' name is converted to snake_case to use as the default key.
67 """
68 return camel_to_snake(clsname)
71def _from_mock_url(url: str) -> str:
72 """
73 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
74 """
75 return url.removeprefix("mock://")
78def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
79 """
80 Based on the url (which may have an extension) and the requests response \
81 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
83 Falls back to JSON if none can be found.
84 """
85 url = url.split("?")[0]
86 if url_extension := os.path.splitext(url)[1].lower():
87 return url_extension.strip(".")
89 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
90 content_type = content_type_header.split("/")[-1]
91 if content_type != "plain":
92 return content_type
94 # If both methods fail, default to JSON
95 return "json"
98def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
99 """
100 Load data as bytes into a file-like object and return the file type.
102 This can be used by __load_data:
103 > loader = loaders.get(filetype)
104 > # dev/null exists but always returns b''
105 > data = loader(contents, Path("/dev/null"))
106 """
107 if url.startswith("mock://"):
108 data = _from_mock_url(url)
109 resp = None
110 elif _dummy:
111 resp = None
112 data = "{}"
113 else:
114 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1"
116 resp = requests.get(url, timeout=10, verify=ssl_verify)
117 data = resp.text
119 filetype = guess_filetype_for_url(url, resp)
120 return io.BytesIO(data.encode()), filetype
123def dotenv_values() -> dict[str, str | None]:
124 """Wrapper around dotenv.dotenv_values that uses .env in cwd."""
125 return _dotenv_values(dotenv_path=find_dotenv(usecwd=True))
128def apply_env(data: dict[str, typing.Any], use_env: UseEnvSetting) -> None:
129 """
130 Apply the desired env-setting logic on data.
131 """
132 match use_env:
133 case "yes":
134 env = dotenv_values() | os.environ
135 case "inverse":
136 env = os.environ | dotenv_values()
137 case "dotenv":
138 env = dotenv_values()
139 case "environ":
140 env = {**os.environ}
141 case _: # pragma: no cover
142 return
144 expand_env_vars_into_toml_values(data, env)
147def _load_data(
148 data: T_data,
149 key: str = None,
150 classname: str = None,
151 lower_keys: bool = False,
152 allow_types: tuple[type, ...] = (dict,),
153 strict: bool = False,
154 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
155) -> dict[str, typing.Any]:
156 """
157 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
159 E.g. class Tool will be mapped to key tool.
160 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
161 """
162 if isinstance(data, bytes):
163 # instantly return, don't modify
164 # bytes as inputs -> bytes as output
165 # but since `T_data` is re-used, that's kind of hard to type for mypy.
166 return data # type: ignore
168 if isinstance(data, list):
169 if not data:
170 raise ValueError("Empty list passed!")
172 final_data: dict[str, typing.Any] = {}
173 for source in data:
174 final_data |= load_data(
175 source,
176 key=key,
177 classname=classname,
178 lower_keys=True,
179 allow_types=allow_types,
180 strict=strict,
181 use_env=use_env,
182 )
184 return final_data
186 if isinstance(data, str):
187 if data.startswith(("http://", "https://", "mock://")):
188 contents, filetype = from_url(data)
190 loader = loaders.get(filetype)
191 # dev/null exists but always returns b''
192 data = loader(contents, Path("/dev/null"))
193 else:
194 data = Path(data)
196 if isinstance(data, Path):
197 with data.open("rb") as f:
198 loader = loaders.get(data.suffix or data.name)
199 data = loader(f, data.resolve())
201 if not data:
202 return {}
204 if key is None:
205 # try to guess key by grabbing the first one or using the class name
206 if len(data) == 1:
207 key = next(iter(data.keys()))
208 elif classname is not None:
209 key = _guess_key(classname)
211 if key:
212 data = _data_for_nested_key(key, data)
214 if not data:
215 raise ValueError("No data found!")
217 if not isinstance(data, allow_types):
218 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
220 if lower_keys and isinstance(data, dict):
221 data = {k.lower(): v for k, v in data.items()}
223 if use_env != "no" and isinstance(data, dict):
224 apply_env(data, use_env)
226 return typing.cast(dict[str, typing.Any], data)
229def load_data(
230 data: T_data,
231 key: str = None,
232 classname: str = None,
233 lower_keys: bool = False,
234 allow_types: tuple[type, ...] = (dict,),
235 strict: bool = False,
236 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
237) -> dict[str, typing.Any]:
238 """
239 Wrapper around __load_data that retries with key="" if anything goes wrong.
240 """
241 if data is None:
242 # try to load pyproject.toml
243 data = find_pyproject_toml()
245 try:
246 return _load_data(
247 data,
248 key,
249 classname,
250 lower_keys=lower_keys,
251 allow_types=allow_types,
252 strict=strict,
253 use_env=use_env,
254 )
255 except Exception as e:
256 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches
257 # @sourcery: `key != ""` is NOT the same as `not key`
258 if key != "":
259 # try again with key ""
260 return load_data(
261 data,
262 "",
263 classname,
264 lower_keys=lower_keys,
265 allow_types=allow_types,
266 strict=strict,
267 use_env=use_env,
268 )
269 elif strict:
270 raise FailedToLoad(data) from e
271 else:
272 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json
273 warnings.warn(f"Data ('{data!r}') could not be loaded", source=e, category=UserWarning)
274 return {}
277F = typing.TypeVar("F")
280def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T:
281 """
282 Convert a value between types.
283 """
284 if converter := CONVERTERS.get((from_type, to_type)):
285 return typing.cast(T, converter(from_value))
287 # default: just convert type:
288 return to_type(from_value) # type: ignore
291def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T:
292 """
293 Checks if the given value matches the specified type. If it does, the value is returned as is.
295 Args:
296 value (Any): The value to be checked and potentially converted.
297 _type (Type[T]): The expected type for the value.
298 convert_types (bool): If True, allows type conversion if the types do not match.
299 key (str, optional): The name or key associated with the variable (used in error messages).
300 Defaults to "variable".
302 Returns:
303 T: The value, potentially converted to the expected type.
305 Raises:
306 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed.
307 ConfigErrorCouldNotConvert: If type conversion fails.
308 """
309 if check_type(value, _type):
310 # type matches
311 return value
313 if isinstance(value, Alias):
314 if is_optional(_type):
315 return typing.cast(T, None)
316 else:
317 # unresolved alias, error should've already been thrown for parent but lets do it again:
318 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type)
320 if not convert_types:
321 # type does not match and should not be converted
322 raise ConfigErrorInvalidType(key, value=value, expected_type=_type)
324 # else: type does not match, try to convert it
325 try:
326 return convert_between(value, type(value), _type)
327 except (TypeError, ValueError) as e:
328 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e
331def ensure_types(
332 data: dict[str, T],
333 annotations: dict[str, type[T]],
334 convert_types: bool = False,
335) -> dict[str, T | None]:
336 """
337 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
339 If an annotated key in missing from data, it will be filled with None for convenience.
341 TODO: python 3.11 exception groups to throw multiple errors at once!
342 """
343 # custom object to use instead of None, since typing.Optional can be None!
344 # cast to T to make mypy happy
345 notfound = typing.cast(T, object())
347 final: dict[str, T | None] = {}
348 for key, _type in annotations.items():
349 compare = data.get(key, notfound)
350 if compare is notfound: # pragma: nocover
351 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`")
352 # skip!
353 continue
355 if isinstance(compare, Postponed):
356 # don't do anything with this item!
357 continue
359 if isinstance(compare, Alias):
360 related_data = data.get(compare.to, notfound)
361 if related_data is not notfound:
362 if isinstance(related_data, Postponed):
363 # also continue alias for postponed items
364 continue
366 # original key set, update alias
367 compare = related_data
369 compare = check_and_convert_type(compare, _type, convert_types, key)
371 final[key] = compare
373 return final
376def convert_key(key: str) -> str:
377 """
378 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
379 """
380 return key.replace("-", "_").replace(".", "_")
383def convert_config(items: dict[str, T]) -> dict[str, T]:
384 """
385 Converts the config dict (from toml) or 'overwrites' dict in two ways.
387 1. removes any items where the value is None, since in that case the default should be used;
388 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
389 """
390 return {convert_key(k): v for k, v in items.items() if v is not None}
393def load_recursive(
394 cls: AnyType,
395 data: dict[str, T],
396 annotations: dict[str, AnyType],
397 convert_types: bool = False,
398) -> dict[str, T]:
399 """
400 For all annotations (recursively gathered from parents with `all_annotations`), \
401 try to resolve the tree of annotations.
403 Uses `load_into_recurse`, not itself directly.
405 Example:
406 class First:
407 key: str
409 class Second:
410 other: First
412 # step 1
413 cls = Second
414 data = {"second": {"other": {"key": "anything"}}}
415 annotations: {"other": First}
417 # step 1.5
418 data = {"other": {"key": "anything"}
419 annotations: {"other": First}
421 # step 2
422 cls = First
423 data = {"key": "anything"}
424 annotations: {"key": str}
427 TODO: python 3.11 exception groups to throw multiple errors at once!
428 """
429 updated = {}
431 for _key, _type in annotations.items():
432 if _key in data:
433 value: typing.Any = data[_key] # value can change so define it as any instead of T
434 if is_parameterized(_type):
435 origin = typing.get_origin(_type)
436 arguments = typing.get_args(_type)
437 if origin is list and arguments and is_custom_class(arguments[0]):
438 subtype = arguments[0]
439 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
441 elif origin is dict and arguments and is_custom_class(arguments[1]):
442 # e.g. dict[str, Point]
443 subkeytype, subvaluetype = arguments
444 # subkey(type) is not a custom class, so don't try to convert it:
445 value = {
446 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
447 for subkey, subvalue in value.items()
448 }
449 # elif origin is dict:
450 # keep data the same
451 elif is_union(_type) and arguments:
452 for arg in arguments:
453 if is_custom_class(arg):
454 value = _load_into_recurse(arg, value, convert_types=convert_types)
456 elif is_custom_class(_type):
457 # type must be C (custom class) at this point; includes dataclass but not optional[cls]
458 value = _load_into_recurse(
459 # make mypy and pycharm happy by telling it _type is of type C...
460 # actually just passing _type as first arg!
461 typing.cast(Type_C[typing.Any], _type),
462 value,
463 convert_types=convert_types,
464 )
466 # else: normal value, don't change
468 elif value := has_alias(cls, _key, data):
469 # value updated by alias
470 ...
471 elif _key in cls.__dict__:
472 # property has default, use that instead.
473 value = cls.__dict__[_key]
474 elif is_optional(_type):
475 # type is optional and not found in __dict__ -> default is None
476 value = None
477 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
478 # could have a default factory
479 # todo: do something with field.default?
480 value = field.default_factory()
481 else:
482 raise ConfigErrorMissingKey(_key, cls, _type)
484 updated[_key] = value
486 return updated
489def check_and_convert_data(
490 cls: typing.Type[C],
491 data: dict[str, typing.Any],
492 _except: typing.Iterable[str],
493 strict: bool = True,
494 convert_types: bool = False,
495) -> dict[str, typing.Any]:
496 """
497 Based on class annotations, this prepares the data for `load_into_recurse`.
499 1. convert config-keys to python compatible config_keys
500 2. loads custom class type annotations with the same logic (see also `load_recursive`)
501 3. ensures the annotated types match the actual types after loading the config file.
502 """
503 annotations = all_annotations(cls, _except=_except)
505 to_load = convert_config(data)
506 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
508 if strict:
509 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
511 return to_load
514T_init_list = list[typing.Any]
515T_init_dict = dict[str, typing.Any]
516T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
519@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
520def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
521 """
522 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
523 """
524 if not init:
525 return [], {}
527 args: T_init_list = []
528 kwargs: T_init_dict = {}
529 match init:
530 case (args, kwargs):
531 return args, kwargs
532 case [*args]:
533 return args, {}
534 case {**kwargs}:
535 return [], kwargs
536 case _:
537 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
540def _load_into_recurse(
541 cls: typing.Type[C],
542 data: dict[str, typing.Any] | bytes,
543 init: T_init = None,
544 strict: bool = True,
545 convert_types: bool = False,
546) -> C:
547 """
548 Loads an instance of `cls` filled with `data`.
550 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
551 `init` can be used to optionally pass extra __init__ arguments. \
552 NOTE: This will overwrite a config key with the same name!
553 """
554 init_args, init_kwargs = _split_init(init)
556 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
557 if not isinstance(data, (bytes, dict)): # pragma: no cover
558 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
559 elif not issubclass(cls, BinaryConfig): # pragma: no cover
560 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
562 inst = typing.cast(C, cls._parse_into(data))
563 elif dc.is_dataclass(cls):
564 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
565 if init:
566 raise ValueError("Init is not allowed for dataclasses!")
568 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
569 inst = typing.cast(C, cls(**to_load))
570 elif isinstance(data, cls):
571 # already the right type! (e.g. Pathlib)
572 inst = typing.cast(C, data)
573 else:
574 inst = cls(*init_args, **init_kwargs)
575 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
576 inst.__dict__.update(**to_load)
578 return inst
581def _load_into_instance(
582 inst: C,
583 cls: typing.Type[C],
584 data: dict[str, typing.Any],
585 init: T_init = None,
586 strict: bool = True,
587 convert_types: bool = False,
588) -> C:
589 """
590 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
591 and thus does not support init.
593 """
594 if init is not None:
595 raise ValueError("Can not init an existing instance!")
597 existing_data = inst.__dict__
599 to_load = check_and_convert_data(
600 cls,
601 data,
602 _except=existing_data.keys(),
603 strict=strict,
604 convert_types=convert_types,
605 )
607 inst.__dict__.update(**to_load)
609 return inst
612def load_into_class(
613 cls: typing.Type[C],
614 data: T_data,
615 /,
616 key: str = None,
617 init: T_init = None,
618 strict: bool = True,
619 lower_keys: bool = False,
620 convert_types: bool = False,
621 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
622) -> C:
623 """
624 Shortcut for _load_data + load_into_recurse.
625 """
626 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
627 to_load = load_data(
628 data,
629 key,
630 cls.__name__,
631 lower_keys=lower_keys,
632 allow_types=allow_types,
633 strict=strict,
634 use_env=use_env,
635 )
636 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
639def load_into_instance(
640 inst: C,
641 data: T_data,
642 /,
643 key: str = None,
644 init: T_init = None,
645 strict: bool = True,
646 lower_keys: bool = False,
647 convert_types: bool = False,
648 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
649) -> C:
650 """
651 Shortcut for _load_data + load_into_existing.
652 """
653 cls = inst.__class__
654 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
655 to_load = load_data(
656 data,
657 key,
658 cls.__name__,
659 lower_keys=lower_keys,
660 allow_types=allow_types,
661 strict=strict,
662 use_env=use_env,
663 )
664 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
667def load_into(
668 cls: typing.Type[C],
669 data: T_data = None,
670 /,
671 key: str = None,
672 init: T_init = None,
673 strict: bool = True,
674 lower_keys: bool = False,
675 convert_types: bool = False,
676 use_env: UseEnvSetting = DEFAULT_ENV_SETTING,
677) -> C:
678 """
679 Load your config into a class (instance).
681 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
682 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
684 Args:
685 cls: either a class or an existing instance of that class.
686 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
687 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
688 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
689 strict: enable type checks or allow anything?
690 lower_keys: should the config keys be lowercased? (for .env)
691 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
692 use_env: Controls how ${VAR} placeholders are resolved.
693 Determines which sources are consulted and in what order:
695 - "yes" (default): OS environment → .env
696 - "inverse": .env → OS environment
697 - "dotenv": .env only
698 - "environ": OS environment only
699 - "no": no interpolation
700 """
701 if not isinstance(cls, type):
702 # would not be supported according to mypy, but you can still load_into(instance)
703 return load_into_instance(
704 cls,
705 data,
706 key=key,
707 init=init,
708 strict=strict,
709 lower_keys=lower_keys,
710 convert_types=convert_types,
711 use_env=use_env,
712 )
714 # make mypy and pycharm happy by telling it cls is of type C and not just 'type'
715 # _cls = typing.cast(typing.Type[C], cls)
716 return load_into_class(
717 cls,
718 data,
719 key=key,
720 init=init,
721 strict=strict,
722 lower_keys=lower_keys,
723 convert_types=convert_types,
724 use_env=use_env,
725 )