Source code for scitex_io._utils

#!/usr/bin/env python3
"""
Inline utilities to avoid external dependencies.
All utilities needed by scitex-io that would otherwise come from scitex.
"""

import os
from pathlib import Path


# String utilities
def clean_path(path_string):
    """Clean and normalize a file system path."""
    return os.path.normpath(str(path_string))


def color_text(text, color):
    """Simple colored text."""
    try:
        from colorama import Fore, Style

        colors = {
            "green": Fore.GREEN,
            "red": Fore.RED,
            "yellow": Fore.YELLOW,
            "blue": Fore.BLUE,
            "magenta": Fore.MAGENTA,
            "cyan": Fore.CYAN,
        }
        return f"{colors.get(color, '')}{text}{Style.RESET_ALL}"
    except ImportError:
        return text


def readable_bytes(size):
    """Convert bytes to human readable format."""
    for unit in ["B", "KB", "MB", "GB", "TB"]:
        if size < 1024.0:
            return f"{size:.2f} {unit}"
        size /= 1024.0
    return f"{size:.2f} PB"


# Dict utilities
[docs] class DotDict: """A dictionary-like object that allows attribute-like access (for valid identifier keys) and standard item access for all keys (including integers, etc.). Case-insensitive on string-key lookup, storage-stable ----------------------------------------------------- Keys are stored exactly as set (``load_configs`` separately normalises every config key to UPPER on load). Lookups, however, are **case-insensitive for string keys**: ``d["seizure"]``, ``d["SEIZURE"]``, ``d.seizure`` and ``d.SEIZURE`` all resolve to the same stored value regardless of the stored case, and ``"seizure" in d`` matches a stored ``"SEIZURE"`` (and vice versa). This means a config written ``STR2COLOR: {"seizure": "red"}`` — which ``load_configs`` stores as ``{"SEIZURE": "red"}`` — can still be looked up with the lowercase key the user wrote (``CONFIG.X.STR2COLOR["seizure"]``) without a surprise ``KeyError``. ``keys()`` / ``values()`` / ``items()`` / iteration return the stored (canonical) form — they are NOT case-folded. Non-string keys (ints, etc.) are left untouched and matched exactly. """
[docs] def __init__(self, dictionary=None): super().__setattr__("_data", {}) if dictionary is not None: if isinstance(dictionary, DotDict): dictionary = dictionary._data elif not isinstance(dictionary, dict): raise TypeError("Input must be a dictionary.") for key, value in dictionary.items(): if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) self[key] = value
[docs] def _resolve_key(self, key): """Return the stored key matching ``key`` case-insensitively. Resolution order, designed so the common (UPPER-stored) path stays O(1) and the case-insensitive scan runs only on a genuine miss: 1. Exact match — covers non-string keys and same-case lookups. 2. For string keys, ``key.upper()`` — covers lowercase lookup of an UPPER-stored key (the ``load_configs`` case). 3. For string keys, a case-insensitive scan over stored string keys — covers any other case mix (e.g. lowercase storage). Raises ``KeyError`` (carrying the *original* lookup key) when nothing matches, so callers see the key they actually asked for. """ data = self._data if key in data: return key if isinstance(key, str): upper = key.upper() if upper in data: return upper for stored in data: if isinstance(stored, str) and stored.upper() == upper: return stored raise KeyError(key)
def __getattr__(self, key): if key.startswith("_"): return super().__getattribute__(key) try: return self._data[self._resolve_key(key)] except KeyError: raise AttributeError( f"'{type(self).__name__}' object has no attribute '{key}'" ) def __setattr__(self, key, value): if key == "_data" or key.startswith("_"): super().__setattr__(key, value) else: if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) self._data[key] = value def __delattr__(self, key): if key.startswith("_"): super().__delattr__(key) else: try: del self._data[key] except KeyError: raise AttributeError( f"'{type(self).__name__}' object has no attribute '{key}'" ) def __getitem__(self, key): return self._data[self._resolve_key(key)] def __setitem__(self, key, value): if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) self._data[key] = value def __delitem__(self, key): del self._data[key]
[docs] def get(self, key, default=None): # Case-insensitive for string keys, mirroring __getitem__, so # d.get("seizure") and d["seizure"] never disagree. try: return self._data[self._resolve_key(key)] except KeyError: return default
[docs] def to_dict(self, include_private=False): """Recursively convert to plain dict.""" result = {} for key, value in self._data.items(): if not include_private and isinstance(key, str) and key.startswith("_"): continue if isinstance(value, DotDict): value = value.to_dict(include_private=include_private) result[key] = value return result
def __str__(self): import json as _json def default_handler(obj): if isinstance(obj, DotDict): return obj.to_dict() try: _json.dumps(obj) return obj except (TypeError, OverflowError): return str(obj) try: return _json.dumps(self.to_dict(), indent=4, default=default_handler) except TypeError as e: return f"<DotDict at {hex(id(self))}, keys: {list(self._data.keys())}> Error: {e}" def __repr__(self): import pprint as _pprint return _pprint.pformat(self.to_dict(include_private=False), indent=2, width=80) def __len__(self): return len(self._data)
[docs] def keys(self): return self._data.keys()
[docs] def values(self): return self._data.values()
[docs] def items(self): return self._data.items()
[docs] def update(self, dictionary): if isinstance(dictionary, dict): iterator = dictionary.items() elif hasattr(dictionary, "__iter__"): iterator = dictionary else: raise TypeError( "Input must be a dictionary or an iterable of key-value pairs." ) for key, value in iterator: self[key] = value
[docs] def setdefault(self, key, default=None): if key not in self._data: self[key] = default return default return self._data[key]
[docs] def pop(self, key, *args): if len(args) > 1: raise TypeError(f"pop expected at most 2 arguments, got {1 + len(args)}") if key not in self._data: if args: return args[0] raise KeyError(key) return self._data.pop(key)
def __contains__(self, key): try: self._resolve_key(key) return True except KeyError: return False def __iter__(self): return iter(self._data)
[docs] def copy(self): return DotDict(self._data.copy())
def __eq__(self, other): if isinstance(other, DotDict): return self._data == other._data elif isinstance(other, dict): return self._data == other return False def __ne__(self, other): return not self.__eq__(other) def __bool__(self): return len(self._data) > 0
# Decorator utilities def preserve_doc(func): """Placeholder for preserve_doc decorator.""" return func # Path utilities def split(path): """Split path into components.""" return Path(path).parts def this_path(): """Get current file path.""" import inspect frame = inspect.currentframe().f_back return frame.f_code.co_filename def clean(path): """Clean path.""" return str(Path(path).resolve()) def getsize(path): """Get file size in bytes.""" return Path(path).stat().st_size if Path(path).exists() else 0 # String parsing def parse(string, pattern=None): """Extract `{name}` placeholders from a path template into a dict. The pattern uses `{name}` for named captures (each capturing a maximal non-`/` segment) and `*` as a non-greedy wildcard. Returns `{}` on no match. Returns the input string unchanged when no pattern is supplied. """ if pattern is None: return string import re regex = re.escape(pattern) regex = re.sub(r"\\{(\w+)\\}", r"(?P<\1>[^/]+?)", regex) regex = regex.replace(r"\*", ".*?") m = re.fullmatch(regex, string) if not m: return {} def _coerce(v: str): # Numeric strings (incl. zero-padded "001") become ints; non-numeric # stays as a string. Tests rely on this coercion for IDs / indices. if v.lstrip("-").isdigit(): return int(v) return v return {k: _coerce(v) for k, v in m.groupdict().items()} # Environment detection def detect_environment(): """Detect execution environment.""" try: get_ipython() # type: ignore return "jupyter" except NameError: return "python" def get_notebook_info_simple(): """Return ``(notebook_filename, notebook_directory)`` or ``(None, None)``. Used by ``scitex_io.save`` to route notebook artefacts to the canonical ``<notebook_dir>/<stem>_out/<file>`` location. Detection layers (first truthy hit wins): 1. Explicit env-var override ``SCITEX_NOTEBOOK_PATH`` — set by CI / nbconvert wrappers when the notebook path can't be discovered from inside the kernel. 2. VS Code Jupyter — ``__vsc_ipynb_file__`` injected into the user namespace by the VS Code Jupyter extension. 3. JupyterLab / classic notebook — ``__session__`` global, plus ``ipynbname.path()`` if the optional ``ipynbname`` package is installed (it queries the running Jupyter server). 4. Fallback to scanning ``sys.argv`` for a ``*.ipynb`` arg (handles ``jupyter nbconvert demo.ipynb`` invocations from tools that forward argv to the kernel). Returns a 2-tuple, never a dict (callers unpack it). """ import os import sys # 1) Explicit override. explicit = os.environ.get("SCITEX_NOTEBOOK_PATH") if explicit and os.path.exists(explicit): path = os.path.abspath(explicit) return os.path.basename(path), os.path.dirname(path) or None # 2/3) IPython user namespace (VS Code, JupyterLab). try: ip = get_ipython() # type: ignore[name-defined] except NameError: ip = None if ip is not None: ns = getattr(ip, "user_ns", {}) or {} for key in ("__vsc_ipynb_file__", "__session__", "__notebook__"): candidate = ns.get(key) if isinstance(candidate, str) and candidate.endswith(".ipynb"): if os.path.exists(candidate): path = os.path.abspath(candidate) return os.path.basename(path), os.path.dirname(path) or None # ipynbname (best-effort) — only available if the user opted in. try: import ipynbname # type: ignore path = str(ipynbname.path()) if path.endswith(".ipynb") and os.path.exists(path): return os.path.basename(path), os.path.dirname(path) or None except (ImportError, Exception): pass # 4) sys.argv last-ditch (nbconvert running outside a kernel sometimes # passes the path positionally). for arg in sys.argv: if isinstance(arg, str) and arg.endswith(".ipynb") and os.path.exists(arg): path = os.path.abspath(arg) return os.path.basename(path), os.path.dirname(path) or None return None, None