phml.utils.transform.extract
1# pylint: disable=missing-module-docstring 2from phml.nodes import AST, All_Nodes, Comment, Element, Root, Text 3 4 5def to_string(node: AST | All_Nodes) -> str: 6 """Get the raw text content of the element. Works similar to 7 the DOMs Node#textContent getter. 8 9 Args: 10 node (Root | Element | Text): Node to get the text content from 11 12 Returns: 13 str: Raw inner text without formatting. 14 """ 15 16 if isinstance(node, AST): 17 node = node.tree 18 19 if isinstance(node, Text | Comment): 20 return node.value 21 22 def concat_text(element: Element | Root) -> list[str]: 23 result = [] 24 25 for child in element.children: 26 if isinstance(child, (Element, Root)): 27 result.extend(concat_text(child)) 28 elif isinstance(child, Text): 29 result.append(child.value) 30 return result 31 32 if isinstance(node, Root | Element): 33 # Recursive concat 34 return "".join(concat_text(node)) 35 36 return None
def
to_string( node: phml.nodes.AST.AST | phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> str:
6def to_string(node: AST | All_Nodes) -> str: 7 """Get the raw text content of the element. Works similar to 8 the DOMs Node#textContent getter. 9 10 Args: 11 node (Root | Element | Text): Node to get the text content from 12 13 Returns: 14 str: Raw inner text without formatting. 15 """ 16 17 if isinstance(node, AST): 18 node = node.tree 19 20 if isinstance(node, Text | Comment): 21 return node.value 22 23 def concat_text(element: Element | Root) -> list[str]: 24 result = [] 25 26 for child in element.children: 27 if isinstance(child, (Element, Root)): 28 result.extend(concat_text(child)) 29 elif isinstance(child, Text): 30 result.append(child.value) 31 return result 32 33 if isinstance(node, Root | Element): 34 # Recursive concat 35 return "".join(concat_text(node)) 36 37 return None
Get the raw text content of the element. Works similar to the DOMs Node#textContent getter.
Args
- node (Root | Element | Text): Node to get the text content from
Returns
str: Raw inner text without formatting.