Coverage for src/m6rclib/metaphor_parser.py: 100%
216 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-13 18:02 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-13 18:02 +0000
1# Copyright 2024 M6R Ltd.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15import glob
16import os
17from pathlib import Path
19from typing import List, Set, Optional, Union
21from .metaphor_token import Token, TokenType
22from .embed_lexer import EmbedLexer
23from .metaphor_lexer import MetaphorLexer
24from .metaphor_ast_node import MetaphorASTNode, MetaphorASTNodeType
26class MetaphorParserFileAlreadyUsedError(Exception):
27 """Exception raised when a file is used more than once."""
28 def __init__(self, filename: str, token: Token) -> None:
29 super().__init__(f"The file '{filename}' has already been used.")
30 self.filename: str = filename
31 self.token: Token = token
34class MetaphorParserSyntaxError(Exception):
35 """Exception generated when there is a syntax error."""
36 def __init__(self, message: str, filename: str, line: int, column: int, input_text: str) -> None:
37 super().__init__(f"{message}: file: {filename}, line {line}, column {column}, ")
38 self.message: str = message
39 self.filename: str = filename
40 self.line: int = line
41 self.column: int = column
42 self.input_text: str = input_text
45class MetaphorParserError(Exception):
46 """Exception wrapper generated when there is a syntax error."""
47 def __init__(self, message: str, errors: List[MetaphorParserSyntaxError]) -> None:
48 super().__init__(message)
49 self.errors: List[MetaphorParserSyntaxError] = errors
52class MetaphorParser:
53 """
54 Parser class to process tokens and build an Abstract Syntax Tree (AST).
56 Attributes:
57 syntax_tree (MetaphorASTNode): The root node of the AST.
58 parse_errors (List[MetaphorParserSyntaxError]): List of syntax errors encountered during parsing.
59 lexers (List[Union[MetaphorLexer, EmbedLexer]]): Stack of lexers used for parsing multiple files.
60 previously_seen_files (Set[str]): Set of canonical filenames already processed.
61 search_paths (List[str]): List of paths to search for included files.
62 current_token (Optional[Token]): The current token being processed.
63 """
64 def __init__(self) -> None:
65 self.syntax_tree: MetaphorASTNode = MetaphorASTNode(MetaphorASTNodeType.ROOT, "")
66 self.parse_errors: List[MetaphorParserSyntaxError] = []
67 self.lexers: List[Union[MetaphorLexer, EmbedLexer]] = []
68 self.previously_seen_files: Set[str] = set()
69 self.search_paths: List[str] = []
70 self.current_token: Optional[Token] = None
72 def _insert_preamble_text(self, text: str) -> None:
73 self.syntax_tree.attach_child(MetaphorASTNode(MetaphorASTNodeType.TEXT, text))
75 def _generate_preamble(self) -> None:
76 preamble: List[str] = [
77 "The following is written in a language called Metaphor.",
78 "",
79 "Metaphor has the structure of a document tree with branches and leaves being prefixed",
80 "by the keywords \"Role:\", \"Context:\" or \"Action:\". Each of these indicates the",
81 "start of a new block of information.",
82 "",
83 "Blocks have an optional section name that will immediately follow them on the same line.",
84 "If this is missing then the section name is not defined.",
85 "",
86 "After a keyword line there may be one or more lines of text that will describe the purpose",
87 "of that block. A block may also include one or more optional child blocks inside them and",
88 "that further clarify their parent block.",
89 "",
90 "The indentation of the blocks indicates where in the tree the pieces appear. For example a",
91 "\"Context:\" indented by 8 spaces is a child of the context above it that is indented by 4",
92 "spaces. One indented 12 spaces would be a child of the block above it that is indented by",
93 "8 spaces.",
94 "",
95 "If you are presented with code or document fragments inside a block delimited by 3",
96 "backticks then please pay close attention to the indentation level of the opening set of",
97 "backticks. Please remove this amount of whitespace from the start of each line of the",
98 "enclosed text. In the following example, even though \"text line 1\" is indented by",
99 "4 spaces, you should remove these 4 spaces because the backticks are also indented by",
100 "4 spaces. You should also remove 4 spaces from \"text line 2\" because of this",
101 "backtick indentation, but leave the remaining 2 spaces:",
102 " ```plaintext",
103 " text line 1",
104 " text line 2",
105 " ```"
106 "",
107 "If a \"Role:\" block exists then this is the role you should fulfil.",
108 "",
109 "\"Context:\" blocks provide context necessary to understand what you will be asked to do.",
110 "",
111 "An \"Action:\" block describes the task I would like you to do.",
112 "",
113 "When you process the actions please carefully ensure you do all of them accurately. These",
114 "need to fulfil all the details described in the \"Context:\". Ensure you complete all the",
115 "elements and do not include any placeholders.",
116 ""
117 ]
119 for text in preamble:
120 self._insert_preamble_text(text)
122 def parse(self, input_text: str, filename: str, search_paths: List[str]) -> MetaphorASTNode:
123 """
124 Parse an input string and construct the AST.
126 Args:
127 input_text (str): The text to be parsed.
128 filename (str): The name of the file being parsed.
129 search_paths (List[str]): List of paths to search for included files.
131 Returns:
132 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes.
134 Raises:
135 MetaphorParserError: If there are syntax errors during parsing.
136 FileNotFoundError: If a required file cannot be found.
137 """
138 self.search_paths = search_paths
140 try:
141 self.lexers.append(MetaphorLexer(input_text, filename))
142 self._generate_preamble()
144 seen_action_tree: bool = False
145 seen_context_tree: bool = False
146 seen_role_tree: bool = False
148 while True:
149 token = self.get_next_token()
150 if token.type == TokenType.ACTION:
151 if seen_action_tree:
152 self._record_syntax_error(token, "'Action' already defined")
154 self.syntax_tree.attach_child(self._parse_action(token))
155 seen_action_tree = True
156 elif token.type == TokenType.CONTEXT:
157 if seen_context_tree:
158 self._record_syntax_error(token, "'Context' already defined")
160 self.syntax_tree.attach_child(self._parse_context(token))
161 seen_context_tree = True
162 elif token.type == TokenType.ROLE:
163 if seen_role_tree:
164 self._record_syntax_error(token, "'Role' already defined")
166 self.syntax_tree.attach_child(self._parse_role(token))
167 seen_role_tree = True
168 elif token.type == TokenType.END_OF_FILE:
169 if self.parse_errors:
170 raise(MetaphorParserError("parser error", self.parse_errors))
172 return self.syntax_tree
173 else:
174 self._record_syntax_error(token, f"Unexpected token: {token.value} at top level")
175 except FileNotFoundError as e:
176 err_token = self.current_token
177 self.parse_errors.append(MetaphorParserSyntaxError(
178 f"{e}", err_token.filename, err_token.line, err_token.column, err_token.input
179 ))
180 raise(MetaphorParserError("parser error", self.parse_errors)) from e
181 except MetaphorParserFileAlreadyUsedError as e:
182 self.parse_errors.append(MetaphorParserSyntaxError(
183 f"The file '{e.filename}' has already been used",
184 e.token.filename,
185 e.token.line,
186 e.token.column,
187 e.token.input
188 ))
189 raise(MetaphorParserError("parser error", self.parse_errors)) from e
191 def parse_file(self, filename: str, search_paths: List[str]) -> MetaphorASTNode:
192 """
193 Parse a file and construct the AST.
195 Args:
196 filename (str): The path to the file to be parsed.
197 search_paths (List[str]): List of paths to search for included files.
199 Returns:
200 List[Optional[MetaphorASTNode]]: A list containing the role, context, and action AST nodes.
202 Raises:
203 MetaphorParserError: If there are syntax errors during parsing.
204 FileNotFoundError: If the file cannot be found.
205 """
206 try:
207 self._check_file_not_loaded(filename)
208 input_text = self._read_file(filename)
209 return self.parse(input_text, filename, search_paths)
210 except FileNotFoundError as e:
211 self.parse_errors.append(MetaphorParserSyntaxError(
212 f"{e}", "", 0, 0, ""
213 ))
214 raise(MetaphorParserError("parser error", self.parse_errors)) from e
215 except MetaphorParserError as e:
216 raise(MetaphorParserError("parser error", self.parse_errors)) from e
218 def get_next_token(self) -> Token:
219 """Get the next token from the active lexer."""
220 while self.lexers:
221 lexer = self.lexers[-1]
222 token = lexer.get_next_token()
223 self.current_token = token
225 if token.type == TokenType.INCLUDE:
226 self._parse_include()
227 elif token.type == TokenType.EMBED:
228 self._parse_embed()
229 elif token.type == TokenType.END_OF_FILE:
230 self.lexers.pop()
231 else:
232 return token
234 return Token(TokenType.END_OF_FILE, "", "", "", 0, 0)
236 def _record_syntax_error(self, token, message):
237 """Raise a syntax error and add it to the error list."""
238 error = MetaphorParserSyntaxError(
239 message, token.filename, token.line, token.column, token.input
240 )
241 self.parse_errors.append(error)
243 def _find_file_path(self, filename):
244 """Try to find a valid path for a file, given all the search path options"""
245 if Path(filename).exists():
246 return filename
248 # If we don't have an absolute path then we can try search paths.
249 if not os.path.isabs(filename):
250 for path in self.search_paths:
251 try_name = os.path.join(path, filename)
252 if Path(try_name).exists():
253 return try_name
255 raise FileNotFoundError(f"File not found: {filename}")
257 def _read_file(self, filename):
258 """Read file content into memory."""
259 try:
260 with open(filename, 'r', encoding='utf-8') as file:
261 return file.read()
262 except FileNotFoundError as e:
263 raise FileNotFoundError(f"File not found: {filename}") from e
264 except PermissionError as e:
265 raise FileNotFoundError(f"You do not have permission to access: {filename}") from e
266 except IsADirectoryError as e:
267 raise FileNotFoundError(f"Is a directory: {filename}") from e
268 except OSError as e:
269 raise FileNotFoundError(f"OS error: {e}") from e
271 def _check_file_not_loaded(self, filename):
272 """Check we have not already loaded a file."""
273 canonical_filename = os.path.realpath(filename)
274 if canonical_filename in self.previously_seen_files:
275 raise MetaphorParserFileAlreadyUsedError(filename, self.current_token)
277 self.previously_seen_files.add(canonical_filename)
279 def _parse_text(self, token):
280 """Parse a text block."""
281 return MetaphorASTNode(MetaphorASTNodeType.TEXT, token.value)
283 def _parse_action(self, token):
284 """Parse an action block and construct its AST node."""
285 label_name = ""
287 init_token = self.get_next_token()
288 if init_token.type == TokenType.KEYWORD_TEXT:
289 label_name = init_token.value
290 indent_token = self.get_next_token()
291 if indent_token.type != TokenType.INDENT:
292 self._record_syntax_error(
293 token,
294 "Expected indent after keyword description for 'Action' block"
295 )
296 elif init_token.type != TokenType.INDENT:
297 self._record_syntax_error(token, "Expected description or indent for 'Action' block")
299 action_node = MetaphorASTNode(MetaphorASTNodeType.ACTION, label_name)
301 while True:
302 token = self.get_next_token()
303 if token.type == TokenType.TEXT:
304 action_node.attach_child(self._parse_text(token))
305 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
306 return action_node
307 else:
308 self._record_syntax_error(
309 token,
310 f"Unexpected token: {token.value} in 'Action' block"
311 )
313 def _parse_context(self, token):
314 """Parse a Context block."""
315 label_name = ""
317 seen_token_type = TokenType.NONE
319 init_token = self.get_next_token()
320 if init_token.type == TokenType.KEYWORD_TEXT:
321 label_name = init_token.value
322 indent_token = self.get_next_token()
323 if indent_token.type != TokenType.INDENT:
324 self._record_syntax_error(
325 token,
326 "Expected indent after keyword description for 'Context' block"
327 )
328 elif init_token.type != TokenType.INDENT:
329 self._record_syntax_error(token, "Expected description or indent for 'Context' block")
331 context_node = MetaphorASTNode(MetaphorASTNodeType.CONTEXT, label_name)
333 while True:
334 token = self.get_next_token()
335 if token.type == TokenType.TEXT:
336 if seen_token_type != TokenType.NONE:
337 self._record_syntax_error(token, "Text must come first in a 'Context' block")
339 context_node.attach_child(self._parse_text(token))
340 elif token.type == TokenType.CONTEXT:
341 context_node.attach_child(self._parse_context(token))
342 seen_token_type = TokenType.CONTEXT
343 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
344 return context_node
345 else:
346 self._record_syntax_error(token, f"Unexpected token: {token.value} in 'Context' block")
348 def _parse_role(self, token):
349 """Parse a Role block."""
350 label_name = ""
352 init_token = self.get_next_token()
353 if init_token.type == TokenType.KEYWORD_TEXT:
354 label_name = init_token.value
355 indent_token = self.get_next_token()
356 if indent_token.type != TokenType.INDENT:
357 self._record_syntax_error(
358 token,
359 "Expected indent after keyword description for 'Role' block"
360 )
361 elif init_token.type != TokenType.INDENT:
362 self._record_syntax_error(token, "Expected description or indent for 'Role' block")
364 role_node = MetaphorASTNode(MetaphorASTNodeType.ROLE, label_name)
366 while True:
367 token = self.get_next_token()
368 if token.type == TokenType.TEXT:
369 role_node.attach_child(self._parse_text(token))
370 elif token.type == TokenType.OUTDENT or token.type == TokenType.END_OF_FILE:
371 return role_node
372 else:
373 self._record_syntax_error(
374 token,
375 f"Unexpected token: {token.value} in 'Role' block"
376 )
378 def _parse_include(self):
379 """Parse an Include block and load the included file."""
380 token_next = self.get_next_token()
381 if token_next.type != TokenType.KEYWORD_TEXT:
382 self._record_syntax_error(token_next, "Expected file name for 'Include'")
383 return
385 filename = token_next.value
386 self._check_file_not_loaded(filename)
387 try_file = self._find_file_path(filename)
388 input_text = self._read_file(try_file)
389 self.lexers.append(MetaphorLexer(input_text, try_file))
391 def _parse_embed(self):
392 """Parse an Embed block and load the embedded file."""
393 token_next = self.get_next_token()
394 if token_next.type != TokenType.KEYWORD_TEXT:
395 self._record_syntax_error(token_next, "Expected file name or wildcard match for 'Embed'")
396 return
398 recurse = False
399 match = token_next.value
400 if "**/" in match:
401 recurse = True
403 files = glob.glob(match, recursive=recurse)
404 if not files:
405 self._record_syntax_error(token_next, f"{match} does not match any files for 'Embed'")
406 return
408 for file in files:
409 input_text = self._read_file(file)
410 self.lexers.append(EmbedLexer(input_text, file))