Source code for scitex_io._glob

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Time-stamp: "2024-11-25 00:31:08 (ywatanabe)"
# File: ./scitex_repo/src/scitex/io/_glob.py

THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/io/_glob.py"

import re as _re
from glob import glob as _glob
from pathlib import Path
from typing import Union

from ._utils import parse as _parse

try:
    from natsort import natsorted as _natsorted
except ImportError:
    _natsorted = sorted


[docs] def glob(expression: Union[str, Path], parse=False, ensure_one=False): """Perform a glob operation with natural sorting and extended pattern support. This function extends the standard glob functionality by adding natural sorting and support for curly brace expansion in the glob pattern. Parameters ---------- expression : str or Path The glob pattern. Supports standard glob and ``{a,b}`` expansion. parse : bool, optional Whether to parse the matched paths. Default is False. ensure_one : bool, optional Ensure exactly one match is found. Default is False. Returns ------- list or tuple If ``parse=False``: naturally sorted file paths. If ``parse=True``: tuple of ``(paths, parsed_results)``. Examples -------- >>> glob('data/*.txt') ['data/file1.txt', 'data/file2.txt', 'data/file10.txt'] >>> glob('data/{a,b}/*.txt') ['data/a/file1.txt', 'data/a/file2.txt', 'data/b/file1.txt'] >>> paths, parsed = glob('data/subj_{id}/run_{run}.txt', parse=True) >>> paths ['data/subj_001/run_01.txt', 'data/subj_001/run_02.txt'] >>> parsed [{'id': '001', 'run': '01'}, {'id': '001', 'run': '02'}] >>> paths, parsed = glob('data/subj_{id}/run_{run}.txt', parse=True, ensure_one=True) AssertionError # if more than one file matches """ # Convert Path objects to strings for consistency if isinstance(expression, Path): expression = str(expression) glob_pattern = _re.sub(r"{[^}]*}", "*", expression) # Enable recursive globbing for ** patterns recursive = "**" in glob_pattern try: found_paths = _natsorted(_glob(eval(glob_pattern), recursive=recursive)) except: found_paths = _natsorted(_glob(glob_pattern, recursive=recursive)) if ensure_one: assert len(found_paths) == 1 if parse: parsed = [_parse(found_path, expression) for found_path in found_paths] return found_paths, parsed else: return found_paths
[docs] def parse_glob(expression: Union[str, Path], ensure_one=False): """Convenience function for glob with parsing enabled. Parameters ---------- expression : str or Path The glob pattern. ensure_one : bool, optional Ensure exactly one match is found. Default is False. Returns ------- tuple Matched paths and parsed results. Examples -------- >>> paths, parsed = pglob('data/subj_{id}/run_{run}.txt') >>> paths ['data/subj_001/run_01.txt', 'data/subj_001/run_02.txt'] >>> parsed [{'id': '001', 'run': '01'}, {'id': '001', 'run': '02'}] >>> paths, parsed = pglob('data/subj_{id}/run_{run}.txt', ensure_one=True) AssertionError # if more than one file matches """ return glob(expression, parse=True, ensure_one=ensure_one)
# EOF