Source code for scitex_io._loading._load_configs

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-10-11 23:54:07 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/io/_load_configs.py
# ----------------------------------------
from __future__ import annotations

import os

__FILE__ = "./src/scitex/io/_load_configs.py"
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------

from pathlib import Path
from typing import Optional, Union

from .._glob import glob
from .._utils import DotDict
from ._load import load


class ConfigLoadError(Exception):
    """Raised by :func:`load_configs` when a YAML config fails to load/process.

    Replaces the historical swallow-and-return-empty-``DotDict`` behaviour
    of the outer ``try/except`` in ``load_configs``. That swallow turned
    every config bug — a malformed YAML, an int-keyed mapping crashing the
    debug-promotion walker, a missing file under ``categories/`` — into a
    silent empty ``DotDict({})``. The user then saw a baffling
    ``'DotDict' object has no attribute 'PAC'`` three frames away from the
    actual failure, with the real ``yaml.YAMLError`` / ``AttributeError``
    only printed to stderr (often invisible in CI logs or pytest captures).

    ``ConfigLoadError`` names the offending YAML file in its message and
    chains the original exception as ``__cause__`` so the actual root
    error stays visible in the traceback. Catch this only if you
    genuinely want to recover from a bad config; otherwise let it
    propagate.
    """


def _normalize_to_upper(d, file=None, path="CONFIG"):
    """Normalize every string key in a config tree to UPPER_CASE.

    Walks a (possibly nested) dict/DotDict in place and renames every
    string key to its ``str.upper()`` form so the loaded config is
    case-stable regardless of how filenames and YAML keys were written.
    Non-string keys (ints, etc.) are left untouched. Lookups on the
    resulting :class:`~scitex_io._utils.DotDict` are case-insensitive
    for string keys, so a YAML mapping written ``{"seizure": "red"}``
    (stored as ``{"SEIZURE": "red"}``) can still be read with the
    lowercase key the author wrote.

    Collision fail-loud
    -------------------
    If two keys *inside one mapping* fold to the same UPPER form (e.g.
    literally both ``"seizure"`` and ``"SEIZURE"``, or a ``MODEL.yaml``
    next to a ``model.yaml`` whose stems collide), this raises a loud
    :class:`ValueError` naming the source file, the mapping path, and
    both offending keys. The collision is detected here, at load time —
    never silently merged, dropped, or deferred to a lookup-time
    surprise.

    Parameters
    ----------
    d : dict | DotDict
        Mapping to normalise in place.
    file : str | None
        Source YAML stem for error messages. ``None`` at the top level,
        where the keys are themselves filename stems; in that case a
        collision message names the config directory rather than a file.
    path : str
        Dotted mapping path used in error messages (e.g.
        ``CONFIG.SEIZURE.STR2COLOR``).
    """
    if not isinstance(d, (dict, DotDict)):
        return d

    by_upper: dict[str, list[str]] = {}
    for k in list(d.keys()):
        if isinstance(k, str):
            by_upper.setdefault(k.upper(), []).append(k)

    # Track the original (pre-normalisation) string key behind each UPPER
    # form so the recursion can name children by the stem the author
    # actually wrote (``m.yaml`` → file 'm', not the folded 'M').
    upper_to_original: dict[str, str] = {}
    for upper, variants in by_upper.items():
        if len(variants) > 1:
            where = f"file {file!r}" if file is not None else "the config directory"
            a, b = variants[0], variants[1]
            raise ValueError(
                f"load_configs: case collision in {where} at mapping "
                f"{path!r}: keys {a!r} and {b!r} both normalise to "
                f"{upper!r}. Rename one of them so the loaded config has "
                f"unambiguous UPPER_CASE keys."
            )
        (only,) = variants
        upper_to_original[upper] = only
        if only != upper:
            d[upper] = d.pop(only)

    for k, v in list(d.items()):
        if isinstance(v, (dict, DotDict)):
            # At the top level, each key is a filename stem; descend with
            # the ORIGINAL stem (what the author named the file) as the
            # source-file context for nested collisions.
            child_file = upper_to_original.get(k, k) if file is None else file
            _normalize_to_upper(v, file=child_file, path=f"{path}.{k}")
    return d


[docs] def load_configs( IS_DEBUG=None, show=False, verbose=False, config_dir: Optional[Union[str, Path]] = None, ): """Load and merge every YAML under ``config_dir`` into one ``DotDict``. Filename stems become top-level keys; YAML keys become nested attributes. Every string key (filename stem and every nested key) is normalised to UPPER_CASE at load time so the in-memory tree is case-stable regardless of source casing — ``model.yaml`` with ``hidden_dim: 256`` lands at ``CONFIG.MODEL.HIDDEN_DIM``. Lookups on the returned ``DotDict`` are case-insensitive for string keys, so ``CONFIG.SEIZURE.STR2COLOR["seizure"]`` resolves the stored ``"SEIZURE"`` entry — no surprise ``KeyError`` for the lowercase key the author wrote (non-string keys are matched exactly). If two keys inside one mapping fold to the same UPPER form (e.g. ``MODEL.yaml`` next to ``model.yaml``, or ``HIDDEN_DIM`` next to ``hidden_dim``, or ``"seizure"`` next to ``"SEIZURE"`` in one string-mapping), a loud ``ValueError`` is raised at load time naming the source file, the mapping path, and both offending keys. The collision is never silently merged or dropped. Debug mode promotes any ``DEBUG_<KEY>`` sibling over its non-debug counterpart, so a single ``IS_DEBUG.yaml`` flips the whole project between production and debug values. Equivalent triggers: ``IS_DEBUG.yaml`` with ``IS_DEBUG: true``, the ``IS_DEBUG=True`` kwarg, or running under ``CI=True``. Parameters ---------- IS_DEBUG : bool, optional Force debug mode. If ``None`` (default), inferred from ``IS_DEBUG.yaml`` inside ``config_dir`` or from the ``CI`` env var. show : bool Echo the ``DEBUG_<KEY> -> <KEY>`` substitutions to stdout. verbose : bool Print detailed information. config_dir : Union[str, Path], optional Directory containing the YAML files. Defaults to ``"./config"``. Returns ------- DotDict Merged configuration tree with UPPER_CASE keys throughout. Raises ------ ValueError If two keys inside one mapping fold to the same UPPER form (a case collision). Raised at load time, naming the file, the mapping path, and both offending keys. ConfigLoadError If reading or processing any YAML file under ``config_dir`` fails for any reason other than a case collision (malformed YAML, missing required file under ``categories/``, an ``apply_debug_values`` walker crash on a malformed mapping, …). The message names the offending file path; the original exception is chained as ``__cause__`` so the traceback shows the root error. Replaces the prior swallow-and-return-empty- ``DotDict`` behaviour, which made every config bug surface as a baffling ``'DotDict' object has no attribute 'X'`` three frames away from the actual failure. Examples -------- >>> CONFIG = load_configs() # ./config/*.yaml >>> CONFIG.MODEL.HIDDEN_DIM # 256 >>> CONFIG = load_configs(IS_DEBUG=True) >>> CONFIG.MODEL.HIDDEN_DIM # 32 (DEBUG_ promoted) """ def apply_debug_values(config, IS_DEBUG): """Apply debug values if IS_DEBUG is True.""" if not IS_DEBUG or not isinstance(config, (dict, DotDict)): return config for key, value in list(config.items()): # YAML mapping keys can be non-string (ints, etc.) — e.g. # SEIZURE.yaml's INT2STR / INT2COLOR carry literal integer # event-code keys. `str.startswith` would raise # `AttributeError: 'int' object has no attribute 'startswith'` # on those, the outer try in `load_configs` would swallow it # as `Error loading configs: ...` and return an empty # DotDict — and the user sees the cryptic downstream # `'DotDict' object has no attribute 'PAC'`. Only the # `DEBUG_<key>` / `debug_<key>` promotion rule applies to # strings; non-string keys are silently recursed into when # they nest another mapping but never pattern-matched. is_debug_prefixed = ( isinstance(key, str) and key.startswith(("DEBUG_", "debug_")) ) if is_debug_prefixed: dk_wo_debug_prefix = key.split("_", 1)[1] config[dk_wo_debug_prefix] = value if show or verbose: print(f"{key} -> {dk_wo_debug_prefix}") elif isinstance(value, (dict, DotDict)): config[key] = apply_debug_values(value, IS_DEBUG) return config # Handle config directory parameter if config_dir is None: config_dir = "./config" elif isinstance(config_dir, Path): config_dir = str(config_dir) # Set debug mode. Wrap the IS_DEBUG.yaml read so a malformed # IS_DEBUG.yaml surfaces as ConfigLoadError naming that file # rather than poisoning every downstream load with a swallowed # error. debug_config_path = f"{config_dir}/IS_DEBUG.yaml" try: IS_DEBUG = ( IS_DEBUG or os.getenv("CI") == "True" or ( os.path.exists(debug_config_path) and load(debug_config_path).get("IS_DEBUG") ) ) except (ConfigLoadError, ValueError): raise except Exception as e: raise ConfigLoadError( f"load_configs failed reading IS_DEBUG flag from " f"{debug_config_path!r}: {type(e).__name__}: {e}" ) from e # Load and merge configs (namespaced by filename) CONFIGS = {} def _ingest(lpath: str) -> None: """Load one YAML file into ``CONFIGS`` under its filename stem. Wraps the per-file load + debug walk in a fail-loud envelope so a single bad file surfaces with its path in the error message, instead of silently producing an empty ``DotDict`` further downstream. """ try: if config := load(lpath): filename = Path(lpath).stem CONFIGS[filename] = apply_debug_values(config, IS_DEBUG) except (ConfigLoadError, ValueError): # Re-raise case-collision ValueError and any nested # ConfigLoadError unchanged; everything else is wrapped # below with the file path attached. raise except Exception as e: raise ConfigLoadError( f"load_configs failed processing {lpath!r}: " f"{type(e).__name__}: {e}" ) from e # Load from main config directory config_pattern = f"{config_dir}/*.yaml" for lpath in glob(config_pattern): _ingest(lpath) # Load from categories subdirectory if it exists categories_dir = f"{config_dir}/categories" if os.path.exists(categories_dir): categories_pattern = f"{categories_dir}/*.yaml" for lpath in glob(categories_pattern): _ingest(lpath) # Normalise every filename-level key (from YAML stem) and every # nested string key to UPPER_CASE so the loaded config is # case-stable regardless of source casing. A case collision # (e.g. MODEL.yaml + model.yaml, HIDDEN_DIM + hidden_dim, # "seizure" + "SEIZURE") raises a loud ValueError here, naming # the file/path/keys (see ``_normalize_to_upper``). _normalize_to_upper(CONFIGS) return DotDict(CONFIGS)
# EOF