Coverage for src/configuraptor/core.py: 100%
204 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-09-20 18:19 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-09-20 18:19 +0200
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import typing
9import warnings
10from pathlib import Path
12import requests
14from . import loaders
15from .abs import C, T, T_data, Type_C
16from .binary_config import BinaryConfig
17from .errors import (
18 ConfigErrorCouldNotConvert,
19 ConfigErrorInvalidType,
20 ConfigErrorMissingKey,
21)
22from .helpers import (
23 all_annotations,
24 camel_to_snake,
25 check_type,
26 dataclass_field,
27 find_pyproject_toml,
28 is_custom_class,
29 is_optional,
30 is_parameterized,
31)
32from .postpone import Postponed
33from .type_converters import CONVERTERS
36def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
37 """
38 If a key contains a dot, traverse the raw dict until the right key was found.
40 Example:
41 key = some.nested.key
42 raw = {"some": {"nested": {"key": {"with": "data"}}}}
43 -> {"with": "data"}
44 """
45 parts = key.split(".")
46 while parts:
47 key = parts.pop(0)
48 if key not in raw:
49 return {}
51 raw = raw[key]
53 return raw
56def _guess_key(clsname: str) -> str:
57 """
58 If no key is manually defined for `load_into`, \
59 the class' name is converted to snake_case to use as the default key.
60 """
61 return camel_to_snake(clsname)
64def _from_mock_url(url: str) -> str:
65 """
66 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
67 """
68 return url.removeprefix("mock://")
71def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
72 """
73 Based on the url (which may have an extension) and the requests response \
74 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
76 Falls back to JSON if none can be found.
77 """
78 url = url.split("?")[0]
79 if url_extension := os.path.splitext(url)[1].lower():
80 return url_extension.strip(".")
82 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
83 content_type = content_type_header.split("/")[-1]
84 if content_type != "plain":
85 return content_type
87 # If both methods fail, default to JSON
88 return "json"
91def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
92 """
93 Load data as bytes into a file-like object and return the file type.
95 This can be used by __load_data:
96 > loader = loaders.get(filetype)
97 > # dev/null exists but always returns b''
98 > data = loader(contents, Path("/dev/null"))
99 """
100 if url.startswith("mock://"):
101 data = _from_mock_url(url)
102 resp = None
103 elif _dummy:
104 resp = None
105 data = "{}"
106 else:
107 resp = requests.get(url, timeout=10)
108 data = resp.text
110 filetype = guess_filetype_for_url(url, resp)
111 return io.BytesIO(data.encode()), filetype
114def __load_data(
115 data: T_data,
116 key: str = None,
117 classname: str = None,
118 lower_keys: bool = False,
119 allow_types: tuple[type, ...] = (dict,),
120) -> dict[str, typing.Any]:
121 """
122 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
124 E.g. class Tool will be mapped to key tool.
125 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
126 """
127 if isinstance(data, bytes):
128 # instantly return, don't modify
129 # bytes as inputs -> bytes as output
130 # but since `T_data` is re-used, that's kind of hard to type for mypy.
131 return data # type: ignore
133 if isinstance(data, list):
134 if not data:
135 raise ValueError("Empty list passed!")
137 final_data: dict[str, typing.Any] = {}
138 for source in data:
139 final_data |= _load_data(source, key=key, classname=classname, lower_keys=True, allow_types=allow_types)
141 return final_data
143 if isinstance(data, str):
144 if data.startswith(("http://", "https://", "mock://")):
145 contents, filetype = from_url(data)
147 loader = loaders.get(filetype)
148 # dev/null exists but always returns b''
149 data = loader(contents, Path("/dev/null"))
150 else:
151 data = Path(data)
153 if isinstance(data, Path):
154 with data.open("rb") as f:
155 loader = loaders.get(data.suffix or data.name)
156 data = loader(f, data.resolve())
158 if not data:
159 return {}
161 if key is None:
162 # try to guess key by grabbing the first one or using the class name
163 if len(data) == 1:
164 key = next(iter(data.keys()))
165 elif classname is not None:
166 key = _guess_key(classname)
168 if key:
169 data = _data_for_nested_key(key, data)
171 if not data:
172 raise ValueError("No data found!")
174 if not isinstance(data, allow_types):
175 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
177 if lower_keys and isinstance(data, dict):
178 data = {k.lower(): v for k, v in data.items()}
180 return data
183def _load_data(
184 data: T_data,
185 key: str = None,
186 classname: str = None,
187 lower_keys: bool = False,
188 allow_types: tuple[type, ...] = (dict,),
189) -> dict[str, typing.Any]:
190 """
191 Wrapper around __load_data that retries with key="" if anything goes wrong.
192 """
193 if data is None:
194 # try to load pyproject.toml
195 data = find_pyproject_toml()
197 try:
198 return __load_data(data, key, classname, lower_keys=lower_keys, allow_types=allow_types)
199 except Exception as e:
200 if key != "":
201 return __load_data(data, "", classname, lower_keys=lower_keys, allow_types=allow_types)
202 else: # pragma: no cover
203 warnings.warn(f"Data could not be loaded: {e}", source=e)
204 # key already was "", just return data!
205 # (will probably not happen but fallback)
206 return {}
209F = typing.TypeVar("F")
212def convert_between(from_value: F, from_type: typing.Type[F], to_type: type[T]) -> T:
213 """
214 Convert a value between types.
215 """
216 if converter := CONVERTERS.get((from_type, to_type)):
217 return typing.cast(T, converter(from_value))
219 # default: just convert type:
220 return to_type(from_value) # type: ignore
223def ensure_types(
224 data: dict[str, T], annotations: dict[str, type[T]], convert_types: bool = False
225) -> dict[str, T | None]:
226 """
227 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
229 If an annotated key in missing from data, it will be filled with None for convenience.
231 TODO: python 3.11 exception groups to throw multiple errors at once!
232 """
233 # custom object to use instead of None, since typing.Optional can be None!
234 # cast to T to make mypy happy
235 notfound = typing.cast(T, object())
236 postponed = Postponed()
238 final: dict[str, T | None] = {}
239 for key, _type in annotations.items():
240 compare = data.get(key, notfound)
241 if compare is notfound: # pragma: nocover
242 warnings.warn(
243 "This should not happen since " "`load_recursive` already fills `data` " "based on `annotations`"
244 )
245 # skip!
246 continue
248 if compare is postponed:
249 # don't do anything with this item!
250 continue
252 if not check_type(compare, _type):
253 if convert_types:
254 try:
255 compare = convert_between(compare, type(compare), _type)
256 except (TypeError, ValueError) as e:
257 raise ConfigErrorCouldNotConvert(type(compare), _type, compare) from e
258 else:
259 raise ConfigErrorInvalidType(key, value=compare, expected_type=_type)
261 final[key] = compare
263 return final
266def convert_config(items: dict[str, T]) -> dict[str, T]:
267 """
268 Converts the config dict (from toml) or 'overwrites' dict in two ways.
270 1. removes any items where the value is None, since in that case the default should be used;
271 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
272 """
273 return {k.replace("-", "_").replace(".", "_"): v for k, v in items.items() if v is not None}
276Type = typing.Type[typing.Any]
277T_Type = typing.TypeVar("T_Type", bound=Type)
280def load_recursive(
281 cls: Type, data: dict[str, T], annotations: dict[str, Type], convert_types: bool = False
282) -> dict[str, T]:
283 """
284 For all annotations (recursively gathered from parents with `all_annotations`), \
285 try to resolve the tree of annotations.
287 Uses `load_into_recurse`, not itself directly.
289 Example:
290 class First:
291 key: str
293 class Second:
294 other: First
296 # step 1
297 cls = Second
298 data = {"second": {"other": {"key": "anything"}}}
299 annotations: {"other": First}
301 # step 1.5
302 data = {"other": {"key": "anything"}
303 annotations: {"other": First}
305 # step 2
306 cls = First
307 data = {"key": "anything"}
308 annotations: {"key": str}
311 TODO: python 3.11 exception groups to throw multiple errors at once!
312 """
313 updated = {}
315 for _key, _type in annotations.items():
316 if _key in data:
317 value: typing.Any = data[_key] # value can change so define it as any instead of T
318 if is_parameterized(_type):
319 origin = typing.get_origin(_type)
320 arguments = typing.get_args(_type)
321 if origin is list and arguments and is_custom_class(arguments[0]):
322 subtype = arguments[0]
323 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
325 elif origin is dict and arguments and is_custom_class(arguments[1]):
326 # e.g. dict[str, Point]
327 subkeytype, subvaluetype = arguments
328 # subkey(type) is not a custom class, so don't try to convert it:
329 value = {
330 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
331 for subkey, subvalue in value.items()
332 }
333 # elif origin is dict:
334 # keep data the same
335 elif origin is typing.Union and arguments:
336 for arg in arguments:
337 if is_custom_class(arg):
338 value = _load_into_recurse(arg, value, convert_types=convert_types)
339 else:
340 # print(_type, arg, value)
341 ...
343 # todo: other parameterized/unions/typing.Optional
345 elif is_custom_class(_type):
346 # type must be C (custom class) at this point
347 value = _load_into_recurse(
348 # make mypy and pycharm happy by telling it _type is of type C...
349 # actually just passing _type as first arg!
350 typing.cast(Type_C[typing.Any], _type),
351 value,
352 convert_types=convert_types,
353 )
355 elif _key in cls.__dict__:
356 # property has default, use that instead.
357 value = cls.__dict__[_key]
358 elif is_optional(_type):
359 # type is optional and not found in __dict__ -> default is None
360 value = None
361 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
362 # could have a default factory
363 # todo: do something with field.default?
364 value = field.default_factory()
365 else:
366 raise ConfigErrorMissingKey(_key, cls, _type)
368 updated[_key] = value
370 return updated
373def check_and_convert_data(
374 cls: typing.Type[C],
375 data: dict[str, typing.Any],
376 _except: typing.Iterable[str],
377 strict: bool = True,
378 convert_types: bool = False,
379) -> dict[str, typing.Any]:
380 """
381 Based on class annotations, this prepares the data for `load_into_recurse`.
383 1. convert config-keys to python compatible config_keys
384 2. loads custom class type annotations with the same logic (see also `load_recursive`)
385 3. ensures the annotated types match the actual types after loading the config file.
386 """
387 annotations = all_annotations(cls, _except=_except)
389 to_load = convert_config(data)
390 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
391 if strict:
392 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
394 return to_load
397T_init_list = list[typing.Any]
398T_init_dict = dict[str, typing.Any]
399T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
402@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
403def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
404 """
405 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
406 """
407 if not init:
408 return [], {}
410 args: T_init_list = []
411 kwargs: T_init_dict = {}
412 match init:
413 case (args, kwargs):
414 return args, kwargs
415 case [*args]:
416 return args, {}
417 case {**kwargs}:
418 return [], kwargs
419 case _:
420 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
423def _load_into_recurse(
424 cls: typing.Type[C],
425 data: dict[str, typing.Any] | bytes,
426 init: T_init = None,
427 strict: bool = True,
428 convert_types: bool = False,
429) -> C:
430 """
431 Loads an instance of `cls` filled with `data`.
433 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
434 `init` can be used to optionally pass extra __init__ arguments. \
435 NOTE: This will overwrite a config key with the same name!
436 """
437 init_args, init_kwargs = _split_init(init)
439 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
440 if not isinstance(data, (bytes, dict)): # pragma: no cover
441 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
442 elif not issubclass(cls, BinaryConfig): # pragma: no cover
443 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
445 inst = typing.cast(C, cls._parse_into(data))
446 elif dc.is_dataclass(cls):
447 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
448 if init:
449 raise ValueError("Init is not allowed for dataclasses!")
451 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
452 inst = typing.cast(C, cls(**to_load))
453 else:
454 inst = cls(*init_args, **init_kwargs)
455 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
456 inst.__dict__.update(**to_load)
458 return inst
461def _load_into_instance(
462 inst: C,
463 cls: typing.Type[C],
464 data: dict[str, typing.Any],
465 init: T_init = None,
466 strict: bool = True,
467 convert_types: bool = False,
468) -> C:
469 """
470 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
471 and thus does not support init.
473 """
474 if init is not None:
475 raise ValueError("Can not init an existing instance!")
477 existing_data = inst.__dict__
479 to_load = check_and_convert_data(
480 cls, data, _except=existing_data.keys(), strict=strict, convert_types=convert_types
481 )
483 inst.__dict__.update(**to_load)
485 return inst
488def load_into_class(
489 cls: typing.Type[C],
490 data: T_data,
491 /,
492 key: str = None,
493 init: T_init = None,
494 strict: bool = True,
495 lower_keys: bool = False,
496 convert_types: bool = False,
497) -> C:
498 """
499 Shortcut for _load_data + load_into_recurse.
500 """
501 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
502 to_load = _load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types)
503 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
506def load_into_instance(
507 inst: C,
508 data: T_data,
509 /,
510 key: str = None,
511 init: T_init = None,
512 strict: bool = True,
513 lower_keys: bool = False,
514 convert_types: bool = False,
515) -> C:
516 """
517 Shortcut for _load_data + load_into_existing.
518 """
519 cls = inst.__class__
520 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
521 to_load = _load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types)
522 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
525def load_into(
526 cls: typing.Type[C],
527 data: T_data = None,
528 /,
529 key: str = None,
530 init: T_init = None,
531 strict: bool = True,
532 lower_keys: bool = False,
533 convert_types: bool = False,
534) -> C:
535 """
536 Load your config into a class (instance).
538 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
539 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
541 Args:
542 cls: either a class or an existing instance of that class.
543 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
544 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
545 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
546 strict: enable type checks or allow anything?
547 lower_keys: should the config keys be lowercased? (for .env)
548 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
550 """
551 if not isinstance(cls, type):
552 # would not be supported according to mypy, but you can still load_into(instance)
553 return load_into_instance(
554 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
555 )
557 # make mypy and pycharm happy by telling it cls is of type C and not just 'type'
558 # _cls = typing.cast(typing.Type[C], cls)
559 return load_into_class(
560 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
561 )