Coverage for src/file_tree/parse_tree.py: 85%
146 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-17 13:27 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-17 13:27 +0000
1"""Parse a string representation of a FileTree."""
2import os.path as op
3import re
4import sys
5from contextlib import contextmanager
6from glob import glob
7from pathlib import Path
8from typing import Dict, List, Optional, Tuple, Union
10from .file_tree import FileTree
11from .template import Template
13if sys.version_info > (3, 10):
14 from importlib import metadata as importlib_metadata
15else:
16 import importlib_metadata
18# searching for a file-tree
20"""List of directories to look for FileTrees
21"""
22tree_directories = ["."]
24available_subtrees: Dict[str, Union[FileTree, str, Path]] = {}
26scanned_plugins = set()
29@contextmanager
30def extra_tree_dirs(extra_dirs):
31 """Temporarily insert ``extra_dirs`` to the beginning of :attr:`tree_directories`.
33 :arg extra_dirs: Sequence of additional tree file directories to search.
34 """
35 global tree_directories
37 old_tree_directories = list(tree_directories)
39 tree_directories = list(extra_dirs) + list(tree_directories)
41 try:
42 yield
43 finally:
44 tree_directories = old_tree_directories
47def scan_plugins():
48 """Scan plugins for filetrees."""
49 for ep in importlib_metadata.entry_points(group="file_tree.trees"):
50 if ep.module in scanned_plugins:
51 continue
52 plugin = ep.load()
53 plugin()
54 scanned_plugins.add(ep.module)
57def search_tree(name: str) -> Union[Path, str, FileTree]:
58 """
59 Search for the file defining the specific tree.
61 Iteratively searches through the directories in ``tree_directories`` till a file named ${name}.tree is found.
62 If not found in ``tree_directories`` the filetrees in installed python packages will be searched.
64 :param name: Name of the tree
65 :return: string containing the filetree definition
66 """
67 for directory in tree_directories:
68 filename = op.join(directory, name)
69 if not filename.endswith(".tree"):
70 filename = filename + ".tree"
71 if op.exists(filename):
72 return Path(filename)
74 scan_plugins()
76 for ext in (".tree", ""):
77 if name + ext in available_subtrees:
78 return available_subtrees[name + ext]
80 raise ValueError("No file tree found for %s" % name)
83def list_all_trees() -> List[str]:
84 """Generate a list of available sub-trees.
86 Lists trees available in ``tree_directories`` (default just the current directory) and in installed file-tree plugins (e.g., `file-tree-fsl`).
87 """
88 scan_plugins()
89 trees = list(available_subtrees.keys())
90 for directory in tree_directories:
91 trees.extend(op.basename(fn) for fn in glob(op.join(directory, "*.tree")))
92 return trees
95# reading the file-tree
98def read_file_tree_text(lines: List[str], top_level: Union[str, Template]) -> FileTree:
99 """Parse the provided lines to read a FileTree.
101 See :func:`add_line_to_tree` for how individual lines are parsed
103 Args:
104 lines (List[str]): Individual lines read from a file-tree file
105 top_level (Template): top-level template
107 Returns:
108 FileTree: tree read from the file
109 """
110 tree = FileTree.empty(top_level)
111 current: List[Tuple[int, str]] = []
112 to_link: List[List[str]] = []
113 for line in lines:
114 current = add_line_to_tree(tree, line, current, to_link)
115 for keys in to_link:
116 tree.placeholders.link(*keys)
117 return tree
120def add_line_to_tree(
121 tree: FileTree, line: str, current: List[Tuple[int, Template]], to_link: List[List[str]]
122) -> List[Tuple[int, Optional[Template]]]:
123 """Add template or sub-tree defined on this file.
125 There are 5 options for the line:
127 1. Empty lines or those containing only comments (start with #) do nothing.
128 2. Templates have the form " <unique part> (<short name>)" and are added as a new template (note that the <short name> is optional).
129 3. Placeholder values have the form "<key> = <value>" and are stored as placeholder values in the tree.
130 4. Sub-trees have the form " -><tree name> [<placeholder>=<value>,...] (<short name>)" and are added as a new sub-tree
131 5. Linkages between placeholder values are indicated by "&LINK <placeholder>,..."
133 The parent directory of the new template or sub-tree is determined by the amount of white space.
135 Args:
136 tree: tree containing all templates/sub-trees read so far (will be updated in place with new template or sub-tree).
137 line: current line from the file-tree definition.
138 current: sequence of the possible parent directories and their indentation.
139 to_link: continuously updated list of which placeholders to link after reading tree.
141 Raises:
142 ValueError: raised for a variety of formatting errors.
144 Returns:
145 New sequence of the possible parent directories after reading the line
146 """
147 stripped = line.split("#")[0].strip()
148 if len(stripped) == 0:
149 return current
150 nspaces = line.index(stripped)
151 parent = get_parent(nspaces, current)
152 new_current = [(n, template) for n, template in current if n < nspaces]
154 if stripped[:2] == "->":
155 sub_tree, short_names = read_subtree_line(stripped)
156 tree.add_subtree(sub_tree, short_names, parent)
157 new_current.append((nspaces, None))
158 elif "=" in stripped:
159 key, value = [s.strip() for s in stripped.split("=")]
160 if value.strip() == "None":
161 value = None
162 if "," in value:
163 value = [
164 None if v.strip() == "None" else v.strip() for v in value.split(",")
165 ]
166 tree.update(inplace=True, **{key: value})
167 elif stripped.startswith("&LINK"):
168 keys = [k.strip() for k in stripped[5:].split(",")]
169 to_link.append(keys)
170 else:
171 if stripped[0] == "!":
172 if nspaces != 0:
173 raise ValueError(
174 f"Defining a new top-level with '!' is only available at the top-level, but the line '{stripped}' is indented"
175 )
176 stripped = stripped[1:]
177 real_parent = None
178 else:
179 real_parent = parent
180 unique_part, short_names = read_line(stripped)
181 if short_names is not None and "" in short_names:
182 short_names.remove("")
183 template = tree.add_template(unique_part, short_names, real_parent)
184 new_current.append((nspaces, template))
185 return new_current
188def get_parent(nspaces: int, current: List[Tuple[int, Template]]) -> Template:
189 """Determine the parent template based on the amount of whitespace.
191 Args:
192 nspaces (int): amount of whitespace before the new line
193 current (List[Tuple[int, str]]): sequence of possible parent directories and their indentation
195 Raises:
196 ValueError: raised of parent is a sub-tree rather than a template
197 ValueError: raise if number of spaces does not match any existing directory
199 Returns:
200 str: empty string if the parent is the top-level directory; template short name otherwise
201 """
202 if len(current) == 0:
203 return ""
204 nspaces_max = current[-1][0]
205 if nspaces > nspaces_max:
206 if current[-1][1] is None:
207 raise ValueError(
208 "Current line seems to be the child of a sub-tree, which is not supported."
209 )
210 return current[-1][1]
212 for idx, (nspaces_template, _) in enumerate(current):
213 if nspaces_template == nspaces:
214 if idx == 0:
215 return ""
216 else:
217 return current[idx - 1][1]
218 raise ValueError(
219 "Number of spaces of current line does not match any previous lines."
220 )
223def check_forbidden_characters(text, characters, text_type):
224 """
225 Check the text for forbidden characters.
227 Raises ValueError if one is found.
229 :param text: string with the text
230 :param characters: sequence of forbidden characters
231 :param text_type: type of the text to raise in error message
232 """
233 bad = [character for character in characters if character in text]
234 if len(bad) > 0:
235 raise ValueError(
236 'Invalid character(s) "{}" in {}: {}'.format("".join(bad), text_type, text)
237 )
240def read_line(line: str) -> Tuple[Union[FileTree, str], List[Optional[str]]]:
241 """
242 Parse line from the tree file.
244 :param line: input line from a ``*.tree`` file
245 :return: Tuple with:
247 - unique part of the filename
248 - short name of the file (None if not provided)
249 """
250 if line.strip()[:1] == "->":
251 return read_subtree_line(line)
252 match = re.match(r"^(\s*)(\S*)\s*\((\S*)\)\s*$", line)
253 if match is not None:
254 gr = match.groups()
255 check_forbidden_characters(gr[1], r'<>"|', "file or directory name")
256 if "," in gr[2]:
257 short_names = [
258 name.strip() for name in gr[2].split(",") if len(name.strip()) > 0
259 ]
260 else:
261 short_names = [gr[2].strip()]
262 return gr[1], short_names
263 match = re.match(r"^(\s*)(\S*)\s*$", line)
264 if match is not None:
265 gr = match.groups()
266 check_forbidden_characters(gr[1], r'<>"|', "file or directory name")
267 return gr[1], None
268 raise ValueError("Unrecognized line %s" % line)
271def read_subtree_line(line: str) -> Tuple[FileTree, List[Optional[str]]]:
272 """
273 Parse the line defining a sub_tree.
275 :param line: input line from a ``*.tree`` file
276 :param template: containing template
277 :return: Tuple with
279 - sub_tree
280 - short name of the sub_tree (None if not provided)
281 """
282 match = re.match(r"^(\s*)->\s*(\S*)(.*)\((\S*)\)", line)
283 short_names: List[Optional[str]]
284 if match is None:
285 match = re.match(r"^(\s*)->\s*(\S*)(.*)", line)
286 if match is None:
287 raise ValueError(
288 "Sub-tree line could not be parsed: {}".format(line.strip())
289 )
290 _, type_name, variables_str = match.groups()
291 short_names = [None]
292 else:
293 _, type_name, variables_str, full_short_name = match.groups()
294 check_forbidden_characters(full_short_name, r"(){}/", "sub-tree name")
295 if "," in full_short_name:
296 short_names = [
297 name.strip()
298 for name in full_short_name.split(",")
299 if len(name.strip()) > 0
300 ]
301 else:
302 short_names = [full_short_name]
304 check_forbidden_characters(type_name, r'<>:"/\|?*', "filename of sub-tree")
306 variables = {}
307 if len(variables_str.strip()) != 0:
308 for single_variable in variables_str.split(","):
309 key, value = single_variable.split("=")
310 variables[key.strip()] = value.strip()
312 sub_tree = FileTree.read(type_name, **variables)
313 return sub_tree, short_names