Coverage for src/file_tree/parse_tree.py: 85%

146 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-17 13:27 +0000

1"""Parse a string representation of a FileTree.""" 

2import os.path as op 

3import re 

4import sys 

5from contextlib import contextmanager 

6from glob import glob 

7from pathlib import Path 

8from typing import Dict, List, Optional, Tuple, Union 

9 

10from .file_tree import FileTree 

11from .template import Template 

12 

13if sys.version_info > (3, 10): 

14 from importlib import metadata as importlib_metadata 

15else: 

16 import importlib_metadata 

17 

18# searching for a file-tree 

19 

20"""List of directories to look for FileTrees 

21""" 

22tree_directories = ["."] 

23 

24available_subtrees: Dict[str, Union[FileTree, str, Path]] = {} 

25 

26scanned_plugins = set() 

27 

28 

29@contextmanager 

30def extra_tree_dirs(extra_dirs): 

31 """Temporarily insert ``extra_dirs`` to the beginning of :attr:`tree_directories`. 

32 

33 :arg extra_dirs: Sequence of additional tree file directories to search. 

34 """ 

35 global tree_directories 

36 

37 old_tree_directories = list(tree_directories) 

38 

39 tree_directories = list(extra_dirs) + list(tree_directories) 

40 

41 try: 

42 yield 

43 finally: 

44 tree_directories = old_tree_directories 

45 

46 

47def scan_plugins(): 

48 """Scan plugins for filetrees.""" 

49 for ep in importlib_metadata.entry_points(group="file_tree.trees"): 

50 if ep.module in scanned_plugins: 

51 continue 

52 plugin = ep.load() 

53 plugin() 

54 scanned_plugins.add(ep.module) 

55 

56 

57def search_tree(name: str) -> Union[Path, str, FileTree]: 

58 """ 

59 Search for the file defining the specific tree. 

60 

61 Iteratively searches through the directories in ``tree_directories`` till a file named ${name}.tree is found. 

62 If not found in ``tree_directories`` the filetrees in installed python packages will be searched. 

63 

64 :param name: Name of the tree 

65 :return: string containing the filetree definition 

66 """ 

67 for directory in tree_directories: 

68 filename = op.join(directory, name) 

69 if not filename.endswith(".tree"): 

70 filename = filename + ".tree" 

71 if op.exists(filename): 

72 return Path(filename) 

73 

74 scan_plugins() 

75 

76 for ext in (".tree", ""): 

77 if name + ext in available_subtrees: 

78 return available_subtrees[name + ext] 

79 

80 raise ValueError("No file tree found for %s" % name) 

81 

82 

83def list_all_trees() -> List[str]: 

84 """Generate a list of available sub-trees. 

85 

86 Lists trees available in ``tree_directories`` (default just the current directory) and in installed file-tree plugins (e.g., `file-tree-fsl`). 

87 """ 

88 scan_plugins() 

89 trees = list(available_subtrees.keys()) 

90 for directory in tree_directories: 

91 trees.extend(op.basename(fn) for fn in glob(op.join(directory, "*.tree"))) 

92 return trees 

93 

94 

95# reading the file-tree 

96 

97 

98def read_file_tree_text(lines: List[str], top_level: Union[str, Template]) -> FileTree: 

99 """Parse the provided lines to read a FileTree. 

100 

101 See :func:`add_line_to_tree` for how individual lines are parsed 

102 

103 Args: 

104 lines (List[str]): Individual lines read from a file-tree file 

105 top_level (Template): top-level template 

106 

107 Returns: 

108 FileTree: tree read from the file 

109 """ 

110 tree = FileTree.empty(top_level) 

111 current: List[Tuple[int, str]] = [] 

112 to_link: List[List[str]] = [] 

113 for line in lines: 

114 current = add_line_to_tree(tree, line, current, to_link) 

115 for keys in to_link: 

116 tree.placeholders.link(*keys) 

117 return tree 

118 

119 

120def add_line_to_tree( 

121 tree: FileTree, line: str, current: List[Tuple[int, Template]], to_link: List[List[str]] 

122) -> List[Tuple[int, Optional[Template]]]: 

123 """Add template or sub-tree defined on this file. 

124 

125 There are 5 options for the line: 

126 

127 1. Empty lines or those containing only comments (start with #) do nothing. 

128 2. Templates have the form " <unique part> (<short name>)" and are added as a new template (note that the <short name> is optional). 

129 3. Placeholder values have the form "<key> = <value>" and are stored as placeholder values in the tree. 

130 4. Sub-trees have the form " -><tree name> [<placeholder>=<value>,...] (<short name>)" and are added as a new sub-tree 

131 5. Linkages between placeholder values are indicated by "&LINK <placeholder>,..." 

132 

133 The parent directory of the new template or sub-tree is determined by the amount of white space. 

134 

135 Args: 

136 tree: tree containing all templates/sub-trees read so far (will be updated in place with new template or sub-tree). 

137 line: current line from the file-tree definition. 

138 current: sequence of the possible parent directories and their indentation. 

139 to_link: continuously updated list of which placeholders to link after reading tree. 

140 

141 Raises: 

142 ValueError: raised for a variety of formatting errors. 

143 

144 Returns: 

145 New sequence of the possible parent directories after reading the line 

146 """ 

147 stripped = line.split("#")[0].strip() 

148 if len(stripped) == 0: 

149 return current 

150 nspaces = line.index(stripped) 

151 parent = get_parent(nspaces, current) 

152 new_current = [(n, template) for n, template in current if n < nspaces] 

153 

154 if stripped[:2] == "->": 

155 sub_tree, short_names = read_subtree_line(stripped) 

156 tree.add_subtree(sub_tree, short_names, parent) 

157 new_current.append((nspaces, None)) 

158 elif "=" in stripped: 

159 key, value = [s.strip() for s in stripped.split("=")] 

160 if value.strip() == "None": 

161 value = None 

162 if "," in value: 

163 value = [ 

164 None if v.strip() == "None" else v.strip() for v in value.split(",") 

165 ] 

166 tree.update(inplace=True, **{key: value}) 

167 elif stripped.startswith("&LINK"): 

168 keys = [k.strip() for k in stripped[5:].split(",")] 

169 to_link.append(keys) 

170 else: 

171 if stripped[0] == "!": 

172 if nspaces != 0: 

173 raise ValueError( 

174 f"Defining a new top-level with '!' is only available at the top-level, but the line '{stripped}' is indented" 

175 ) 

176 stripped = stripped[1:] 

177 real_parent = None 

178 else: 

179 real_parent = parent 

180 unique_part, short_names = read_line(stripped) 

181 if short_names is not None and "" in short_names: 

182 short_names.remove("") 

183 template = tree.add_template(unique_part, short_names, real_parent) 

184 new_current.append((nspaces, template)) 

185 return new_current 

186 

187 

188def get_parent(nspaces: int, current: List[Tuple[int, Template]]) -> Template: 

189 """Determine the parent template based on the amount of whitespace. 

190 

191 Args: 

192 nspaces (int): amount of whitespace before the new line 

193 current (List[Tuple[int, str]]): sequence of possible parent directories and their indentation 

194 

195 Raises: 

196 ValueError: raised of parent is a sub-tree rather than a template 

197 ValueError: raise if number of spaces does not match any existing directory 

198 

199 Returns: 

200 str: empty string if the parent is the top-level directory; template short name otherwise 

201 """ 

202 if len(current) == 0: 

203 return "" 

204 nspaces_max = current[-1][0] 

205 if nspaces > nspaces_max: 

206 if current[-1][1] is None: 

207 raise ValueError( 

208 "Current line seems to be the child of a sub-tree, which is not supported." 

209 ) 

210 return current[-1][1] 

211 

212 for idx, (nspaces_template, _) in enumerate(current): 

213 if nspaces_template == nspaces: 

214 if idx == 0: 

215 return "" 

216 else: 

217 return current[idx - 1][1] 

218 raise ValueError( 

219 "Number of spaces of current line does not match any previous lines." 

220 ) 

221 

222 

223def check_forbidden_characters(text, characters, text_type): 

224 """ 

225 Check the text for forbidden characters. 

226 

227 Raises ValueError if one is found. 

228 

229 :param text: string with the text 

230 :param characters: sequence of forbidden characters 

231 :param text_type: type of the text to raise in error message 

232 """ 

233 bad = [character for character in characters if character in text] 

234 if len(bad) > 0: 

235 raise ValueError( 

236 'Invalid character(s) "{}" in {}: {}'.format("".join(bad), text_type, text) 

237 ) 

238 

239 

240def read_line(line: str) -> Tuple[Union[FileTree, str], List[Optional[str]]]: 

241 """ 

242 Parse line from the tree file. 

243 

244 :param line: input line from a ``*.tree`` file 

245 :return: Tuple with: 

246 

247 - unique part of the filename 

248 - short name of the file (None if not provided) 

249 """ 

250 if line.strip()[:1] == "->": 

251 return read_subtree_line(line) 

252 match = re.match(r"^(\s*)(\S*)\s*\((\S*)\)\s*$", line) 

253 if match is not None: 

254 gr = match.groups() 

255 check_forbidden_characters(gr[1], r'<>"|', "file or directory name") 

256 if "," in gr[2]: 

257 short_names = [ 

258 name.strip() for name in gr[2].split(",") if len(name.strip()) > 0 

259 ] 

260 else: 

261 short_names = [gr[2].strip()] 

262 return gr[1], short_names 

263 match = re.match(r"^(\s*)(\S*)\s*$", line) 

264 if match is not None: 

265 gr = match.groups() 

266 check_forbidden_characters(gr[1], r'<>"|', "file or directory name") 

267 return gr[1], None 

268 raise ValueError("Unrecognized line %s" % line) 

269 

270 

271def read_subtree_line(line: str) -> Tuple[FileTree, List[Optional[str]]]: 

272 """ 

273 Parse the line defining a sub_tree. 

274 

275 :param line: input line from a ``*.tree`` file 

276 :param template: containing template 

277 :return: Tuple with 

278 

279 - sub_tree 

280 - short name of the sub_tree (None if not provided) 

281 """ 

282 match = re.match(r"^(\s*)->\s*(\S*)(.*)\((\S*)\)", line) 

283 short_names: List[Optional[str]] 

284 if match is None: 

285 match = re.match(r"^(\s*)->\s*(\S*)(.*)", line) 

286 if match is None: 

287 raise ValueError( 

288 "Sub-tree line could not be parsed: {}".format(line.strip()) 

289 ) 

290 _, type_name, variables_str = match.groups() 

291 short_names = [None] 

292 else: 

293 _, type_name, variables_str, full_short_name = match.groups() 

294 check_forbidden_characters(full_short_name, r"(){}/", "sub-tree name") 

295 if "," in full_short_name: 

296 short_names = [ 

297 name.strip() 

298 for name in full_short_name.split(",") 

299 if len(name.strip()) > 0 

300 ] 

301 else: 

302 short_names = [full_short_name] 

303 

304 check_forbidden_characters(type_name, r'<>:"/\|?*', "filename of sub-tree") 

305 

306 variables = {} 

307 if len(variables_str.strip()) != 0: 

308 for single_variable in variables_str.split(","): 

309 key, value = single_variable.split("=") 

310 variables[key.strip()] = value.strip() 

311 

312 sub_tree = FileTree.read(type_name, **variables) 

313 return sub_tree, short_names