Coverage for src/configuraptor/core.py: 100%

204 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-11-09 11:17 +0100

1""" 

2Contains most of the loading logic. 

3""" 

4 

5import dataclasses as dc 

6import io 

7import os 

8import typing 

9import warnings 

10from pathlib import Path 

11 

12import requests 

13 

14from . import loaders 

15from .abs import C, T, T_data, Type_C 

16from .binary_config import BinaryConfig 

17from .errors import ( 

18 ConfigErrorCouldNotConvert, 

19 ConfigErrorInvalidType, 

20 ConfigErrorMissingKey, 

21) 

22from .helpers import ( 

23 all_annotations, 

24 camel_to_snake, 

25 check_type, 

26 dataclass_field, 

27 find_pyproject_toml, 

28 is_custom_class, 

29 is_optional, 

30 is_parameterized, 

31) 

32from .postpone import Postponed 

33from .type_converters import CONVERTERS 

34 

35 

36def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]: 

37 """ 

38 If a key contains a dot, traverse the raw dict until the right key was found. 

39 

40 Example: 

41 key = some.nested.key 

42 raw = {"some": {"nested": {"key": {"with": "data"}}}} 

43 -> {"with": "data"} 

44 """ 

45 parts = key.split(".") 

46 while parts: 

47 key = parts.pop(0) 

48 if key not in raw: 

49 return {} 

50 

51 raw = raw[key] 

52 

53 return raw 

54 

55 

56def _guess_key(clsname: str) -> str: 

57 """ 

58 If no key is manually defined for `load_into`, \ 

59 the class' name is converted to snake_case to use as the default key. 

60 """ 

61 return camel_to_snake(clsname) 

62 

63 

64def _from_mock_url(url: str) -> str: 

65 """ 

66 Pytest only: when starting a url with mock:// it is expected to just be json afterwards. 

67 """ 

68 return url.removeprefix("mock://") 

69 

70 

71def guess_filetype_for_url(url: str, response: requests.Response = None) -> str: 

72 """ 

73 Based on the url (which may have an extension) and the requests response \ 

74 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml). 

75 

76 Falls back to JSON if none can be found. 

77 """ 

78 url = url.split("?")[0] 

79 if url_extension := os.path.splitext(url)[1].lower(): 

80 return url_extension.strip(".") 

81 

82 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()): 

83 content_type = content_type_header.split("/")[-1] 

84 if content_type != "plain": 

85 return content_type 

86 

87 # If both methods fail, default to JSON 

88 return "json" 

89 

90 

91def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]: 

92 """ 

93 Load data as bytes into a file-like object and return the file type. 

94 

95 This can be used by __load_data: 

96 > loader = loaders.get(filetype) 

97 > # dev/null exists but always returns b'' 

98 > data = loader(contents, Path("/dev/null")) 

99 """ 

100 if url.startswith("mock://"): 

101 data = _from_mock_url(url) 

102 resp = None 

103 elif _dummy: 

104 resp = None 

105 data = "{}" 

106 else: 

107 resp = requests.get(url, timeout=10) 

108 data = resp.text 

109 

110 filetype = guess_filetype_for_url(url, resp) 

111 return io.BytesIO(data.encode()), filetype 

112 

113 

114def _load_data( 

115 data: T_data, 

116 key: str = None, 

117 classname: str = None, 

118 lower_keys: bool = False, 

119 allow_types: tuple[type, ...] = (dict,), 

120) -> dict[str, typing.Any]: 

121 """ 

122 Tries to load the right data from a filename/path or dict, based on a manual key or a classname. 

123 

124 E.g. class Tool will be mapped to key tool. 

125 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}} 

126 """ 

127 if isinstance(data, bytes): 

128 # instantly return, don't modify 

129 # bytes as inputs -> bytes as output 

130 # but since `T_data` is re-used, that's kind of hard to type for mypy. 

131 return data # type: ignore 

132 

133 if isinstance(data, list): 

134 if not data: 

135 raise ValueError("Empty list passed!") 

136 

137 final_data: dict[str, typing.Any] = {} 

138 for source in data: 

139 final_data |= load_data(source, key=key, classname=classname, lower_keys=True, allow_types=allow_types) 

140 

141 return final_data 

142 

143 if isinstance(data, str): 

144 if data.startswith(("http://", "https://", "mock://")): 

145 contents, filetype = from_url(data) 

146 

147 loader = loaders.get(filetype) 

148 # dev/null exists but always returns b'' 

149 data = loader(contents, Path("/dev/null")) 

150 else: 

151 data = Path(data) 

152 

153 if isinstance(data, Path): 

154 with data.open("rb") as f: 

155 loader = loaders.get(data.suffix or data.name) 

156 data = loader(f, data.resolve()) 

157 

158 if not data: 

159 return {} 

160 

161 if key is None: 

162 # try to guess key by grabbing the first one or using the class name 

163 if len(data) == 1: 

164 key = next(iter(data.keys())) 

165 elif classname is not None: 

166 key = _guess_key(classname) 

167 

168 if key: 

169 data = _data_for_nested_key(key, data) 

170 

171 if not data: 

172 raise ValueError("No data found!") 

173 

174 if not isinstance(data, allow_types): 

175 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!") 

176 

177 if lower_keys and isinstance(data, dict): 

178 data = {k.lower(): v for k, v in data.items()} 

179 

180 return typing.cast(dict[str, typing.Any], data) 

181 

182 

183def load_data( 

184 data: T_data, 

185 key: str = None, 

186 classname: str = None, 

187 lower_keys: bool = False, 

188 allow_types: tuple[type, ...] = (dict,), 

189) -> dict[str, typing.Any]: 

190 """ 

191 Wrapper around __load_data that retries with key="" if anything goes wrong. 

192 """ 

193 if data is None: 

194 # try to load pyproject.toml 

195 data = find_pyproject_toml() 

196 

197 try: 

198 return _load_data(data, key, classname, lower_keys=lower_keys, allow_types=allow_types) 

199 except Exception as e: 

200 if key != "": 

201 return _load_data(data, "", classname, lower_keys=lower_keys, allow_types=allow_types) 

202 else: # pragma: no cover 

203 warnings.warn(f"Data could not be loaded: {e}", source=e) 

204 # key already was "", just return data! 

205 # (will probably not happen but fallback) 

206 return {} 

207 

208 

209F = typing.TypeVar("F") 

210 

211 

212def convert_between(from_value: F, from_type: typing.Type[F], to_type: type[T]) -> T: 

213 """ 

214 Convert a value between types. 

215 """ 

216 if converter := CONVERTERS.get((from_type, to_type)): 

217 return typing.cast(T, converter(from_value)) 

218 

219 # default: just convert type: 

220 return to_type(from_value) # type: ignore 

221 

222 

223def ensure_types( 

224 data: dict[str, T], annotations: dict[str, type[T]], convert_types: bool = False 

225) -> dict[str, T | None]: 

226 """ 

227 Make sure all values in 'data' are in line with the ones stored in 'annotations'. 

228 

229 If an annotated key in missing from data, it will be filled with None for convenience. 

230 

231 TODO: python 3.11 exception groups to throw multiple errors at once! 

232 """ 

233 # custom object to use instead of None, since typing.Optional can be None! 

234 # cast to T to make mypy happy 

235 notfound = typing.cast(T, object()) 

236 postponed = Postponed() 

237 

238 final: dict[str, T | None] = {} 

239 for key, _type in annotations.items(): 

240 compare = data.get(key, notfound) 

241 if compare is notfound: # pragma: nocover 

242 warnings.warn( 

243 "This should not happen since " "`load_recursive` already fills `data` " "based on `annotations`" 

244 ) 

245 # skip! 

246 continue 

247 

248 if compare is postponed: 

249 # don't do anything with this item! 

250 continue 

251 

252 if not check_type(compare, _type): 

253 if convert_types: 

254 try: 

255 compare = convert_between(compare, type(compare), _type) 

256 except (TypeError, ValueError) as e: 

257 raise ConfigErrorCouldNotConvert(type(compare), _type, compare) from e 

258 else: 

259 raise ConfigErrorInvalidType(key, value=compare, expected_type=_type) 

260 

261 final[key] = compare 

262 

263 return final 

264 

265 

266def convert_config(items: dict[str, T]) -> dict[str, T]: 

267 """ 

268 Converts the config dict (from toml) or 'overwrites' dict in two ways. 

269 

270 1. removes any items where the value is None, since in that case the default should be used; 

271 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties. 

272 """ 

273 return {k.replace("-", "_").replace(".", "_"): v for k, v in items.items() if v is not None} 

274 

275 

276Type = typing.Type[typing.Any] 

277T_Type = typing.TypeVar("T_Type", bound=Type) 

278 

279 

280def load_recursive( 

281 cls: Type, data: dict[str, T], annotations: dict[str, Type], convert_types: bool = False 

282) -> dict[str, T]: 

283 """ 

284 For all annotations (recursively gathered from parents with `all_annotations`), \ 

285 try to resolve the tree of annotations. 

286 

287 Uses `load_into_recurse`, not itself directly. 

288 

289 Example: 

290 class First: 

291 key: str 

292 

293 class Second: 

294 other: First 

295 

296 # step 1 

297 cls = Second 

298 data = {"second": {"other": {"key": "anything"}}} 

299 annotations: {"other": First} 

300 

301 # step 1.5 

302 data = {"other": {"key": "anything"} 

303 annotations: {"other": First} 

304 

305 # step 2 

306 cls = First 

307 data = {"key": "anything"} 

308 annotations: {"key": str} 

309 

310 

311 TODO: python 3.11 exception groups to throw multiple errors at once! 

312 """ 

313 updated = {} 

314 

315 for _key, _type in annotations.items(): 

316 if _key in data: 

317 value: typing.Any = data[_key] # value can change so define it as any instead of T 

318 if is_parameterized(_type): 

319 origin = typing.get_origin(_type) 

320 arguments = typing.get_args(_type) 

321 if origin is list and arguments and is_custom_class(arguments[0]): 

322 subtype = arguments[0] 

323 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value] 

324 

325 elif origin is dict and arguments and is_custom_class(arguments[1]): 

326 # e.g. dict[str, Point] 

327 subkeytype, subvaluetype = arguments 

328 # subkey(type) is not a custom class, so don't try to convert it: 

329 value = { 

330 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types) 

331 for subkey, subvalue in value.items() 

332 } 

333 # elif origin is dict: 

334 # keep data the same 

335 elif origin is typing.Union and arguments: 

336 for arg in arguments: 

337 if is_custom_class(arg): 

338 value = _load_into_recurse(arg, value, convert_types=convert_types) 

339 else: 

340 # print(_type, arg, value) 

341 ... 

342 

343 # todo: other parameterized/unions/typing.Optional 

344 

345 elif is_custom_class(_type): 

346 # type must be C (custom class) at this point 

347 value = _load_into_recurse( 

348 # make mypy and pycharm happy by telling it _type is of type C... 

349 # actually just passing _type as first arg! 

350 typing.cast(Type_C[typing.Any], _type), 

351 value, 

352 convert_types=convert_types, 

353 ) 

354 

355 elif _key in cls.__dict__: 

356 # property has default, use that instead. 

357 value = cls.__dict__[_key] 

358 elif is_optional(_type): 

359 # type is optional and not found in __dict__ -> default is None 

360 value = None 

361 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING: 

362 # could have a default factory 

363 # todo: do something with field.default? 

364 value = field.default_factory() 

365 else: 

366 raise ConfigErrorMissingKey(_key, cls, _type) 

367 

368 updated[_key] = value 

369 

370 return updated 

371 

372 

373def check_and_convert_data( 

374 cls: typing.Type[C], 

375 data: dict[str, typing.Any], 

376 _except: typing.Iterable[str], 

377 strict: bool = True, 

378 convert_types: bool = False, 

379) -> dict[str, typing.Any]: 

380 """ 

381 Based on class annotations, this prepares the data for `load_into_recurse`. 

382 

383 1. convert config-keys to python compatible config_keys 

384 2. loads custom class type annotations with the same logic (see also `load_recursive`) 

385 3. ensures the annotated types match the actual types after loading the config file. 

386 """ 

387 annotations = all_annotations(cls, _except=_except) 

388 

389 to_load = convert_config(data) 

390 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types) 

391 if strict: 

392 to_load = ensure_types(to_load, annotations, convert_types=convert_types) 

393 

394 return to_load 

395 

396 

397T_init_list = list[typing.Any] 

398T_init_dict = dict[str, typing.Any] 

399T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None 

400 

401 

402@typing.no_type_check # (mypy doesn't understand 'match' fully yet) 

403def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]: 

404 """ 

405 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple. 

406 """ 

407 if not init: 

408 return [], {} 

409 

410 args: T_init_list = [] 

411 kwargs: T_init_dict = {} 

412 match init: 

413 case (args, kwargs): 

414 return args, kwargs 

415 case [*args]: 

416 return args, {} 

417 case {**kwargs}: 

418 return [], kwargs 

419 case _: 

420 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.") 

421 

422 

423def _load_into_recurse( 

424 cls: typing.Type[C], 

425 data: dict[str, typing.Any] | bytes, 

426 init: T_init = None, 

427 strict: bool = True, 

428 convert_types: bool = False, 

429) -> C: 

430 """ 

431 Loads an instance of `cls` filled with `data`. 

432 

433 Uses `load_recursive` to load any fillable annotated properties (see that method for an example). 

434 `init` can be used to optionally pass extra __init__ arguments. \ 

435 NOTE: This will overwrite a config key with the same name! 

436 """ 

437 init_args, init_kwargs = _split_init(init) 

438 

439 if isinstance(data, bytes) or issubclass(cls, BinaryConfig): 

440 if not isinstance(data, (bytes, dict)): # pragma: no cover 

441 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.") 

442 elif not issubclass(cls, BinaryConfig): # pragma: no cover 

443 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.") 

444 

445 inst = typing.cast(C, cls._parse_into(data)) 

446 elif dc.is_dataclass(cls): 

447 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types) 

448 if init: 

449 raise ValueError("Init is not allowed for dataclasses!") 

450 

451 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`) 

452 inst = typing.cast(C, cls(**to_load)) 

453 else: 

454 inst = cls(*init_args, **init_kwargs) 

455 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types) 

456 inst.__dict__.update(**to_load) 

457 

458 return inst 

459 

460 

461def _load_into_instance( 

462 inst: C, 

463 cls: typing.Type[C], 

464 data: dict[str, typing.Any], 

465 init: T_init = None, 

466 strict: bool = True, 

467 convert_types: bool = False, 

468) -> C: 

469 """ 

470 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \ 

471 and thus does not support init. 

472 

473 """ 

474 if init is not None: 

475 raise ValueError("Can not init an existing instance!") 

476 

477 existing_data = inst.__dict__ 

478 

479 to_load = check_and_convert_data( 

480 cls, data, _except=existing_data.keys(), strict=strict, convert_types=convert_types 

481 ) 

482 

483 inst.__dict__.update(**to_load) 

484 

485 return inst 

486 

487 

488def load_into_class( 

489 cls: typing.Type[C], 

490 data: T_data, 

491 /, 

492 key: str = None, 

493 init: T_init = None, 

494 strict: bool = True, 

495 lower_keys: bool = False, 

496 convert_types: bool = False, 

497) -> C: 

498 """ 

499 Shortcut for _load_data + load_into_recurse. 

500 """ 

501 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

502 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types) 

503 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types) 

504 

505 

506def load_into_instance( 

507 inst: C, 

508 data: T_data, 

509 /, 

510 key: str = None, 

511 init: T_init = None, 

512 strict: bool = True, 

513 lower_keys: bool = False, 

514 convert_types: bool = False, 

515) -> C: 

516 """ 

517 Shortcut for _load_data + load_into_existing. 

518 """ 

519 cls = inst.__class__ 

520 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

521 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types) 

522 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types) 

523 

524 

525def load_into( 

526 cls: typing.Type[C], 

527 data: T_data = None, 

528 /, 

529 key: str = None, 

530 init: T_init = None, 

531 strict: bool = True, 

532 lower_keys: bool = False, 

533 convert_types: bool = False, 

534) -> C: 

535 """ 

536 Load your config into a class (instance). 

537 

538 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only 

539 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`. 

540 

541 Args: 

542 cls: either a class or an existing instance of that class. 

543 data: can be a dictionary or a path to a file to load (as pathlib.Path or str) 

544 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific') 

545 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already) 

546 strict: enable type checks or allow anything? 

547 lower_keys: should the config keys be lowercased? (for .env) 

548 convert_types: should the types be converted to the annotated type if not yet matching? (for .env) 

549 

550 """ 

551 if not isinstance(cls, type): 

552 # would not be supported according to mypy, but you can still load_into(instance) 

553 return load_into_instance( 

554 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types 

555 ) 

556 

557 # make mypy and pycharm happy by telling it cls is of type C and not just 'type' 

558 # _cls = typing.cast(typing.Type[C], cls) 

559 return load_into_class( 

560 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types 

561 )