phml.components

  1import os
  2from pathlib import Path
  3from re import finditer
  4from time import time
  5from typing import Any, Iterator, TypedDict, overload
  6
  7from .embedded import Embedded
  8from .helpers import iterate_nodes
  9from .nodes import Element, Literal
 10from .parser import HypertextMarkupParser
 11
 12__all__ = ["ComponentType", "ComponentManager", "tokenize_name"]
 13
 14
 15class ComponentType(TypedDict):
 16    hash: str
 17    props: dict[str, Any]
 18    context: dict[str, Any]
 19    scripts: list[Element]
 20    styles: list[Element]
 21    elements: list[Element | Literal]
 22
 23
 24class ComponentCacheType(TypedDict):
 25    hash: str
 26    scripts: list[Element]
 27    styles: list[Element]
 28
 29
 30def DEFAULT_COMPONENT() -> ComponentType:
 31    return {
 32        "hash": "",
 33        "props": {},
 34        "context": {},
 35        "scripts": [],
 36        "styles": [],
 37        "elements": [],
 38    }
 39
 40
 41def tokenize_name(
 42    name: str,
 43    *,
 44    normalize: bool = False,
 45    title_case: bool = False,
 46) -> list[str]:
 47    """Generates name tokens `some name tokanized` from a filename.
 48    Assumes filenames is one of:
 49    * snakecase - some_file_name
 50    * camel case - someFileName
 51    * pascal case - SomeFileName
 52
 53    Args:
 54        name (str): File name without extension
 55        normalize (bool): Make all tokens fully lowercase. Defaults to True
 56
 57    Returns:
 58        list[str]: List of word tokens.
 59    """
 60    tokens = []
 61    for token in finditer(
 62        r"([A-Z])?([a-z]+)|([0-9]+)|([A-Z]+)(?=[^a-z])",
 63        name.strip(),
 64    ):
 65        first, rest, nums, cap = token.groups()
 66
 67        result = ""
 68        if rest is not None:
 69            result = (first or "") + rest
 70        elif cap is not None:
 71            # Token is all caps. Set to full capture
 72            result = cap
 73        elif nums is not None:
 74            # Token is all numbers. Set to full capture
 75            result = str(nums)
 76
 77        if normalize:
 78            result = result.lower()
 79
 80        if len(result) > 0:
 81            if title_case:
 82                result = result[0].upper() + result[1:]
 83            tokens.append(result)
 84    return tokens
 85
 86
 87def _parse_cmpt_name(name: str) -> str:
 88    tokens = tokenize_name(name.rsplit(".", 1)[0], normalize=True, title_case=True)
 89    return "".join(tokens)
 90
 91
 92def hash_component(cmpt: ComponentType):
 93    """Hash a component for applying unique scope identifier"""
 94    return (
 95        sum(hash(element) for element in cmpt["elements"])
 96        + sum(hash(style) for style in cmpt["styles"])
 97        + sum(hash(script) for script in cmpt["scripts"])
 98        - int(time()%1000) 
 99    )
100
101
102class ComponentManager:
103    components: dict[str, ComponentType]
104
105    def __init__(self) -> None:
106        self.components = {}
107        self._parser = HypertextMarkupParser()
108        self._cache: dict[str, ComponentCacheType] = {}
109
110    def generate_name(self, path: str, ignore: str = "") -> str:
111        """Generate a component name based on it's path. Optionally strip part of the path
112        from the beginning.
113        """
114
115        path = Path(os.path.relpath(path, ignore)).as_posix()
116        parts = path.split("/")
117
118        return ".".join(
119            [
120                *[part[0].upper() + part[1:].lower() for part in parts[:-1]],
121                _parse_cmpt_name(parts[-1]),
122            ],
123        )
124
125    def get_cache(self) -> dict[str, ComponentCacheType]:
126        """Get the current cache of component scripts and styles"""
127        return self._cache
128
129    def cache(self, key: str, value: ComponentType):
130        """Add a cache for a specific component. Will only add the cache if
131        the component is new and unique.
132        """
133        if key not in self._cache:
134            self._cache[key] = {
135                "hash": value["hash"],
136                "scripts": value["scripts"],
137                "styles": value["styles"],
138            }
139
140    def parse(self, content: str, path: str = "") -> ComponentType:
141        ast = self._parser.parse(content)
142
143        component: ComponentType = DEFAULT_COMPONENT()
144        context = Embedded("", path)
145
146        for node in iterate_nodes(ast):
147            if isinstance(node, Element) and node.tag == "python":
148                context += Embedded(node, path)
149                if node.parent is not None:
150                    node.parent.remove(node)
151
152        for node in ast:
153            if isinstance(node, Element):
154                if node.tag == "script" and len(node) == 1 and Literal.is_text(node[0]):
155                    component["scripts"].append(node)
156                elif (
157                    node.tag == "style" and len(node) == 1 and Literal.is_text(node[0])
158                ):
159                    component["styles"].append(node)
160                else:
161                    component["elements"].append(node)
162            elif isinstance(node, Literal):
163                component["elements"].append(node)
164
165        component["props"] = context.context.pop("Props", {})
166        component["context"] = context.context
167        if len(component["elements"]) == 0:
168            raise ValueError("Must have at least one root element in component")
169        component["hash"] = f"~{hash_component(component)}"
170
171        return component
172
173    @overload
174    def add(self, file: str | Path, *, ignore: str = ""):
175        """Add a component to the component manager with a file path. Also, componetes can be added to
176        the component manager with a name and str or an already parsed component dict.
177
178        Args:
179            file (str): The file path to the component.
180            ignore (str): The path prefix to remove before creating the comopnent name.
181            name (str): The name of the component. This is the index/key in the component manager.
182                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
183            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
184                representation of the component, or an already parsed component type dict.
185        """
186        ...
187
188    @overload
189    def add(self, *, name: str, data: str | ComponentType):
190        """Add a component to the component manager with a file path. Also, componetes can be added to
191        the component manager with a name and str or an already parsed component dict.
192
193        Args:
194            file (str): The file path to the component.
195            ignore (str): The path prefix to remove before creating the comopnent name.
196            name (str): The name of the component. This is the index/key in the component manager.
197                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
198            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
199                representation of the component, or an already parsed component type dict.
200        """
201        ...
202
203    def add(
204        self,
205        file: str | Path | None = None,
206        *,
207        name: str | None = None,
208        data: str | ComponentType | None = None,
209        ignore: str = "",
210    ):
211        """Add a component to the component manager with a file path. Also, componetes can be added to
212        the component manager with a name and str or an already parsed component dict.
213
214        Args:
215            file (str): The file path to the component.
216            ignore (str): The path prefix to remove before creating the comopnent name.
217            name (str): The name of the component. This is the index/key in the component manager.
218                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
219            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
220                representation of the component, or an already parsed component type dict.
221        """
222        content: ComponentType = DEFAULT_COMPONENT()
223        if file is None:
224            if name is None:
225                raise ValueError(
226                    "Expected both 'name' and 'data' kwargs to be used together",
227                )
228            if isinstance(data, str):
229                if data == "":
230                    raise ValueError(
231                        "Expected component data to be a string of length longer that 0",
232                    )
233                content.update(self.parse(data, "_cmpt_"))
234            elif isinstance(data, dict):
235                content.update(data)
236            else:
237                raise ValueError(
238                    "Expected component data to be a string or a ComponentType dict",
239                )
240        else:
241            file = Path(file)
242            with file.open("r", encoding="utf-8") as c_file:
243                name = self.generate_name(file.as_posix(), ignore)
244                content.update(self.parse(c_file.read(), file.as_posix()))
245
246        self.validate(content)
247        content["hash"] = name + content["hash"]
248        self.components[name] = content
249
250    def __iter__(self) -> Iterator[tuple[str, ComponentType]]:
251        yield from self.components.items()
252
253    def keys(self):
254        return self.components.keys()
255
256    def values(self):
257        return self.components.values()
258
259    def __contains__(self, key: str) -> bool:
260        return key in self.components
261
262    def __getitem__(self, key: str) -> ComponentType:
263        return self.components[key]
264
265    def __setitem__(self, key: str, value: ComponentType):
266        # TODO: Custom error
267        raise Exception("Cannot set components from slice assignment")
268
269    def remove(self, key: str):
270        """Remove a comopnent from the manager with a specific tag/name."""
271        if key not in self.components:
272            raise KeyError(f"{key} is not a known component")
273        del self.components[key]
274
275    def validate(self, data: ComponentType):
276        if "props" not in data or not isinstance(data["props"], dict):
277            raise ValueError(
278                "Expected ComponentType 'props' that is a dict of str to any value",
279            )
280
281        if "context" not in data or not isinstance(data["context"], dict):
282            raise ValueError(
283                "Expected ComponentType 'context' that is a dict of str to any value",
284            )
285
286        if (
287            "scripts" not in data
288            or not isinstance(data["scripts"], list)
289            or not all(
290                isinstance(script, Element) and script.tag == "script"
291                for script in data["scripts"]
292            )
293        ):
294            raise ValueError(
295                "Expected ComponentType 'script' that is alist of phml elements with a tag of 'script'",
296            )
297
298        if (
299            "styles" not in data
300            or not isinstance(data["styles"], list)
301            or not all(
302                isinstance(style, Element) and style.tag == "style"
303                for style in data["styles"]
304            )
305        ):
306            raise ValueError(
307                "Expected ComponentType 'styles' that is a list of phml elements with a tag of 'style'",
308            )
309
310        if (
311            "elements" not in data
312            or not isinstance(data["elements"], list)
313            or len(data["elements"]) == 0
314            or not all(
315                isinstance(element, (Element, Literal)) for element in data["elements"]
316            )
317        ):
318            raise ValueError(
319                "Expected ComponentType 'elements' to be a list of at least one Element or Literal",
320            )
class ComponentType(typing.TypedDict):
16class ComponentType(TypedDict):
17    hash: str
18    props: dict[str, Any]
19    context: dict[str, Any]
20    scripts: list[Element]
21    styles: list[Element]
22    elements: list[Element | Literal]
Inherited Members
builtins.dict
get
setdefault
pop
popitem
keys
items
values
update
fromkeys
clear
copy
class ComponentManager:
103class ComponentManager:
104    components: dict[str, ComponentType]
105
106    def __init__(self) -> None:
107        self.components = {}
108        self._parser = HypertextMarkupParser()
109        self._cache: dict[str, ComponentCacheType] = {}
110
111    def generate_name(self, path: str, ignore: str = "") -> str:
112        """Generate a component name based on it's path. Optionally strip part of the path
113        from the beginning.
114        """
115
116        path = Path(os.path.relpath(path, ignore)).as_posix()
117        parts = path.split("/")
118
119        return ".".join(
120            [
121                *[part[0].upper() + part[1:].lower() for part in parts[:-1]],
122                _parse_cmpt_name(parts[-1]),
123            ],
124        )
125
126    def get_cache(self) -> dict[str, ComponentCacheType]:
127        """Get the current cache of component scripts and styles"""
128        return self._cache
129
130    def cache(self, key: str, value: ComponentType):
131        """Add a cache for a specific component. Will only add the cache if
132        the component is new and unique.
133        """
134        if key not in self._cache:
135            self._cache[key] = {
136                "hash": value["hash"],
137                "scripts": value["scripts"],
138                "styles": value["styles"],
139            }
140
141    def parse(self, content: str, path: str = "") -> ComponentType:
142        ast = self._parser.parse(content)
143
144        component: ComponentType = DEFAULT_COMPONENT()
145        context = Embedded("", path)
146
147        for node in iterate_nodes(ast):
148            if isinstance(node, Element) and node.tag == "python":
149                context += Embedded(node, path)
150                if node.parent is not None:
151                    node.parent.remove(node)
152
153        for node in ast:
154            if isinstance(node, Element):
155                if node.tag == "script" and len(node) == 1 and Literal.is_text(node[0]):
156                    component["scripts"].append(node)
157                elif (
158                    node.tag == "style" and len(node) == 1 and Literal.is_text(node[0])
159                ):
160                    component["styles"].append(node)
161                else:
162                    component["elements"].append(node)
163            elif isinstance(node, Literal):
164                component["elements"].append(node)
165
166        component["props"] = context.context.pop("Props", {})
167        component["context"] = context.context
168        if len(component["elements"]) == 0:
169            raise ValueError("Must have at least one root element in component")
170        component["hash"] = f"~{hash_component(component)}"
171
172        return component
173
174    @overload
175    def add(self, file: str | Path, *, ignore: str = ""):
176        """Add a component to the component manager with a file path. Also, componetes can be added to
177        the component manager with a name and str or an already parsed component dict.
178
179        Args:
180            file (str): The file path to the component.
181            ignore (str): The path prefix to remove before creating the comopnent name.
182            name (str): The name of the component. This is the index/key in the component manager.
183                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
184            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
185                representation of the component, or an already parsed component type dict.
186        """
187        ...
188
189    @overload
190    def add(self, *, name: str, data: str | ComponentType):
191        """Add a component to the component manager with a file path. Also, componetes can be added to
192        the component manager with a name and str or an already parsed component dict.
193
194        Args:
195            file (str): The file path to the component.
196            ignore (str): The path prefix to remove before creating the comopnent name.
197            name (str): The name of the component. This is the index/key in the component manager.
198                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
199            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
200                representation of the component, or an already parsed component type dict.
201        """
202        ...
203
204    def add(
205        self,
206        file: str | Path | None = None,
207        *,
208        name: str | None = None,
209        data: str | ComponentType | None = None,
210        ignore: str = "",
211    ):
212        """Add a component to the component manager with a file path. Also, componetes can be added to
213        the component manager with a name and str or an already parsed component dict.
214
215        Args:
216            file (str): The file path to the component.
217            ignore (str): The path prefix to remove before creating the comopnent name.
218            name (str): The name of the component. This is the index/key in the component manager.
219                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
220            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
221                representation of the component, or an already parsed component type dict.
222        """
223        content: ComponentType = DEFAULT_COMPONENT()
224        if file is None:
225            if name is None:
226                raise ValueError(
227                    "Expected both 'name' and 'data' kwargs to be used together",
228                )
229            if isinstance(data, str):
230                if data == "":
231                    raise ValueError(
232                        "Expected component data to be a string of length longer that 0",
233                    )
234                content.update(self.parse(data, "_cmpt_"))
235            elif isinstance(data, dict):
236                content.update(data)
237            else:
238                raise ValueError(
239                    "Expected component data to be a string or a ComponentType dict",
240                )
241        else:
242            file = Path(file)
243            with file.open("r", encoding="utf-8") as c_file:
244                name = self.generate_name(file.as_posix(), ignore)
245                content.update(self.parse(c_file.read(), file.as_posix()))
246
247        self.validate(content)
248        content["hash"] = name + content["hash"]
249        self.components[name] = content
250
251    def __iter__(self) -> Iterator[tuple[str, ComponentType]]:
252        yield from self.components.items()
253
254    def keys(self):
255        return self.components.keys()
256
257    def values(self):
258        return self.components.values()
259
260    def __contains__(self, key: str) -> bool:
261        return key in self.components
262
263    def __getitem__(self, key: str) -> ComponentType:
264        return self.components[key]
265
266    def __setitem__(self, key: str, value: ComponentType):
267        # TODO: Custom error
268        raise Exception("Cannot set components from slice assignment")
269
270    def remove(self, key: str):
271        """Remove a comopnent from the manager with a specific tag/name."""
272        if key not in self.components:
273            raise KeyError(f"{key} is not a known component")
274        del self.components[key]
275
276    def validate(self, data: ComponentType):
277        if "props" not in data or not isinstance(data["props"], dict):
278            raise ValueError(
279                "Expected ComponentType 'props' that is a dict of str to any value",
280            )
281
282        if "context" not in data or not isinstance(data["context"], dict):
283            raise ValueError(
284                "Expected ComponentType 'context' that is a dict of str to any value",
285            )
286
287        if (
288            "scripts" not in data
289            or not isinstance(data["scripts"], list)
290            or not all(
291                isinstance(script, Element) and script.tag == "script"
292                for script in data["scripts"]
293            )
294        ):
295            raise ValueError(
296                "Expected ComponentType 'script' that is alist of phml elements with a tag of 'script'",
297            )
298
299        if (
300            "styles" not in data
301            or not isinstance(data["styles"], list)
302            or not all(
303                isinstance(style, Element) and style.tag == "style"
304                for style in data["styles"]
305            )
306        ):
307            raise ValueError(
308                "Expected ComponentType 'styles' that is a list of phml elements with a tag of 'style'",
309            )
310
311        if (
312            "elements" not in data
313            or not isinstance(data["elements"], list)
314            or len(data["elements"]) == 0
315            or not all(
316                isinstance(element, (Element, Literal)) for element in data["elements"]
317            )
318        ):
319            raise ValueError(
320                "Expected ComponentType 'elements' to be a list of at least one Element or Literal",
321            )
ComponentManager()
106    def __init__(self) -> None:
107        self.components = {}
108        self._parser = HypertextMarkupParser()
109        self._cache: dict[str, ComponentCacheType] = {}
def generate_name(self, path: str, ignore: str = '') -> str:
111    def generate_name(self, path: str, ignore: str = "") -> str:
112        """Generate a component name based on it's path. Optionally strip part of the path
113        from the beginning.
114        """
115
116        path = Path(os.path.relpath(path, ignore)).as_posix()
117        parts = path.split("/")
118
119        return ".".join(
120            [
121                *[part[0].upper() + part[1:].lower() for part in parts[:-1]],
122                _parse_cmpt_name(parts[-1]),
123            ],
124        )

Generate a component name based on it's path. Optionally strip part of the path from the beginning.

def get_cache(self) -> dict[str, phml.components.ComponentCacheType]:
126    def get_cache(self) -> dict[str, ComponentCacheType]:
127        """Get the current cache of component scripts and styles"""
128        return self._cache

Get the current cache of component scripts and styles

def cache(self, key: str, value: phml.components.ComponentType):
130    def cache(self, key: str, value: ComponentType):
131        """Add a cache for a specific component. Will only add the cache if
132        the component is new and unique.
133        """
134        if key not in self._cache:
135            self._cache[key] = {
136                "hash": value["hash"],
137                "scripts": value["scripts"],
138                "styles": value["styles"],
139            }

Add a cache for a specific component. Will only add the cache if the component is new and unique.

def parse(self, content: str, path: str = '') -> phml.components.ComponentType:
141    def parse(self, content: str, path: str = "") -> ComponentType:
142        ast = self._parser.parse(content)
143
144        component: ComponentType = DEFAULT_COMPONENT()
145        context = Embedded("", path)
146
147        for node in iterate_nodes(ast):
148            if isinstance(node, Element) and node.tag == "python":
149                context += Embedded(node, path)
150                if node.parent is not None:
151                    node.parent.remove(node)
152
153        for node in ast:
154            if isinstance(node, Element):
155                if node.tag == "script" and len(node) == 1 and Literal.is_text(node[0]):
156                    component["scripts"].append(node)
157                elif (
158                    node.tag == "style" and len(node) == 1 and Literal.is_text(node[0])
159                ):
160                    component["styles"].append(node)
161                else:
162                    component["elements"].append(node)
163            elif isinstance(node, Literal):
164                component["elements"].append(node)
165
166        component["props"] = context.context.pop("Props", {})
167        component["context"] = context.context
168        if len(component["elements"]) == 0:
169            raise ValueError("Must have at least one root element in component")
170        component["hash"] = f"~{hash_component(component)}"
171
172        return component
def add( self, file: str | pathlib.Path | None = None, *, name: str | None = None, data: str | phml.components.ComponentType | None = None, ignore: str = ''):
204    def add(
205        self,
206        file: str | Path | None = None,
207        *,
208        name: str | None = None,
209        data: str | ComponentType | None = None,
210        ignore: str = "",
211    ):
212        """Add a component to the component manager with a file path. Also, componetes can be added to
213        the component manager with a name and str or an already parsed component dict.
214
215        Args:
216            file (str): The file path to the component.
217            ignore (str): The path prefix to remove before creating the comopnent name.
218            name (str): The name of the component. This is the index/key in the component manager.
219                This is also the name of the element in phml. Ex: `Some.Component` == `<Some.Component />`
220            data (str | ComponentType): This is the data that is assigned in the manager. It can be a string
221                representation of the component, or an already parsed component type dict.
222        """
223        content: ComponentType = DEFAULT_COMPONENT()
224        if file is None:
225            if name is None:
226                raise ValueError(
227                    "Expected both 'name' and 'data' kwargs to be used together",
228                )
229            if isinstance(data, str):
230                if data == "":
231                    raise ValueError(
232                        "Expected component data to be a string of length longer that 0",
233                    )
234                content.update(self.parse(data, "_cmpt_"))
235            elif isinstance(data, dict):
236                content.update(data)
237            else:
238                raise ValueError(
239                    "Expected component data to be a string or a ComponentType dict",
240                )
241        else:
242            file = Path(file)
243            with file.open("r", encoding="utf-8") as c_file:
244                name = self.generate_name(file.as_posix(), ignore)
245                content.update(self.parse(c_file.read(), file.as_posix()))
246
247        self.validate(content)
248        content["hash"] = name + content["hash"]
249        self.components[name] = content

Add a component to the component manager with a file path. Also, componetes can be added to the component manager with a name and str or an already parsed component dict.

Args
  • file (str): The file path to the component.
  • ignore (str): The path prefix to remove before creating the comopnent name.
  • name (str): The name of the component. This is the index/key in the component manager. This is also the name of the element in phml. Ex: Some.Component == <Some.Component />
  • data (str | ComponentType): This is the data that is assigned in the manager. It can be a string representation of the component, or an already parsed component type dict.
def keys(self):
254    def keys(self):
255        return self.components.keys()
def values(self):
257    def values(self):
258        return self.components.values()
def remove(self, key: str):
270    def remove(self, key: str):
271        """Remove a comopnent from the manager with a specific tag/name."""
272        if key not in self.components:
273            raise KeyError(f"{key} is not a known component")
274        del self.components[key]

Remove a comopnent from the manager with a specific tag/name.

def validate(self, data: phml.components.ComponentType):
276    def validate(self, data: ComponentType):
277        if "props" not in data or not isinstance(data["props"], dict):
278            raise ValueError(
279                "Expected ComponentType 'props' that is a dict of str to any value",
280            )
281
282        if "context" not in data or not isinstance(data["context"], dict):
283            raise ValueError(
284                "Expected ComponentType 'context' that is a dict of str to any value",
285            )
286
287        if (
288            "scripts" not in data
289            or not isinstance(data["scripts"], list)
290            or not all(
291                isinstance(script, Element) and script.tag == "script"
292                for script in data["scripts"]
293            )
294        ):
295            raise ValueError(
296                "Expected ComponentType 'script' that is alist of phml elements with a tag of 'script'",
297            )
298
299        if (
300            "styles" not in data
301            or not isinstance(data["styles"], list)
302            or not all(
303                isinstance(style, Element) and style.tag == "style"
304                for style in data["styles"]
305            )
306        ):
307            raise ValueError(
308                "Expected ComponentType 'styles' that is a list of phml elements with a tag of 'style'",
309            )
310
311        if (
312            "elements" not in data
313            or not isinstance(data["elements"], list)
314            or len(data["elements"]) == 0
315            or not all(
316                isinstance(element, (Element, Literal)) for element in data["elements"]
317            )
318        ):
319            raise ValueError(
320                "Expected ComponentType 'elements' to be a list of at least one Element or Literal",
321            )
def tokenize_name( name: str, *, normalize: bool = False, title_case: bool = False) -> list[str]:
42def tokenize_name(
43    name: str,
44    *,
45    normalize: bool = False,
46    title_case: bool = False,
47) -> list[str]:
48    """Generates name tokens `some name tokanized` from a filename.
49    Assumes filenames is one of:
50    * snakecase - some_file_name
51    * camel case - someFileName
52    * pascal case - SomeFileName
53
54    Args:
55        name (str): File name without extension
56        normalize (bool): Make all tokens fully lowercase. Defaults to True
57
58    Returns:
59        list[str]: List of word tokens.
60    """
61    tokens = []
62    for token in finditer(
63        r"([A-Z])?([a-z]+)|([0-9]+)|([A-Z]+)(?=[^a-z])",
64        name.strip(),
65    ):
66        first, rest, nums, cap = token.groups()
67
68        result = ""
69        if rest is not None:
70            result = (first or "") + rest
71        elif cap is not None:
72            # Token is all caps. Set to full capture
73            result = cap
74        elif nums is not None:
75            # Token is all numbers. Set to full capture
76            result = str(nums)
77
78        if normalize:
79            result = result.lower()
80
81        if len(result) > 0:
82            if title_case:
83                result = result[0].upper() + result[1:]
84            tokens.append(result)
85    return tokens

Generates name tokens some name tokanized from a filename. Assumes filenames is one of:

  • snakecase - some_file_name
  • camel case - someFileName
  • pascal case - SomeFileName
Args
  • name (str): File name without extension
  • normalize (bool): Make all tokens fully lowercase. Defaults to True
Returns

list[str]: List of word tokens.