Coverage for pymend\docstring_parser\epydoc.py: 99%
153 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
1"""Epyoc-style docstring parsing.
3.. seealso:: http://epydoc.sourceforge.net/manual-fields.html
4"""
6import inspect
7import re
8from typing import NamedTuple, Optional
10from .common import (
11 Docstring,
12 DocstringMeta,
13 DocstringParam,
14 DocstringRaises,
15 DocstringReturns,
16 DocstringStyle,
17 DocstringYields,
18 ParseError,
19 RenderingStyle,
20 append_description,
21 clean_str,
22 split_description,
23)
26class SectionPattern(NamedTuple):
27 """Patterns for docstring sections."""
29 param: re.Pattern[str]
30 raises: re.Pattern[str]
31 returns: re.Pattern[str]
32 meta: re.Pattern[str]
35class SectionMatch(NamedTuple):
36 """Matches of docstring sections."""
38 param: Optional[re.Match[str]]
39 raises: Optional[re.Match[str]]
40 returns: Optional[re.Match[str]]
41 meta: Optional[re.Match[str]]
44def _get_matches_for_chunk(chunk: str, patterns: SectionPattern) -> SectionMatch:
45 """Apply a search for each pattern to the chunk.
47 Parameters
48 ----------
49 chunk : str
50 Chunk to match the patterns against.
51 patterns : SectionPattern
52 Collection of regex patterns to match against the chunk.
54 Returns
55 -------
56 SectionMatch
57 Tuple of matches of the patterns against the chunk.
58 """
59 return SectionMatch(
60 param=re.search(patterns.param, chunk),
61 raises=re.search(patterns.raises, chunk),
62 returns=re.search(patterns.returns, chunk),
63 meta=re.search(patterns.meta, chunk),
64 )
67class StreamToken(NamedTuple):
68 """One entry of the stream list."""
70 base: str
71 key: str
72 args: list[str]
73 desc: str
76def _tokenize(
77 meta_chunk: str,
78 patterns: SectionPattern,
79) -> list[StreamToken]:
80 """Return the tokenized stream according to the regex patterns.
82 Parameters
83 ----------
84 meta_chunk : str
85 Chunk to tokenize.
86 patterns : SectionPattern
87 Collection of patterns for different sections.
89 Returns
90 -------
91 list[StreamToken]
92 (base, key, args, desc)
93 base: Literal['param', 'raise', 'return', 'meta']
94 key: str:
95 args: List[str]
96 desc: str: Description
98 Raises
99 ------
100 ParseError
101 If none of the patterns match against the chunk.
102 ParseError
103 If we match a section in the general meta case that should have already
104 been matched in a specific section.
105 """
106 stream: list[StreamToken] = []
107 for chunk_match in re.finditer(r"(^@.*?)(?=^@|\Z)", meta_chunk, flags=re.S | re.M):
108 chunk = chunk_match.group(0)
109 if not chunk: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true
110 continue
112 matches = _get_matches_for_chunk(chunk, patterns)
114 match = matches.param or matches.raises or matches.returns or matches.meta
115 if not match:
116 msg = f'Error parsing meta information near "{chunk}".'
117 raise ParseError(msg)
119 if matches.param:
120 base = "param"
121 key: str = match.group(1)
122 args = [match.group(2).strip()]
123 elif matches.raises:
124 base = "raise"
125 key: str = match.group(1)
126 args = [] if match.group(2) is None else [match.group(2).strip()]
127 elif matches.returns:
128 base = "return" if match.group(1) in ("return", "rtype") else "yield"
129 key: str = match.group(1)
130 args = []
131 else:
132 base = "meta"
133 key: str = match.group(1)
134 token = clean_str(match.group(2).strip())
135 args = [] if token is None else re.split(r"\s+", token)
137 # Make sure we didn't match some existing keyword in an incorrect
138 # way here:
139 if key in {
140 "param",
141 "keyword",
142 "type",
143 "return",
144 "rtype",
145 "yield",
146 "ytype",
147 }:
148 msg = f'Error parsing meta information near "{chunk}".'
149 raise ParseError(msg)
151 desc = chunk[match.end() :].strip()
152 if "\n" in desc:
153 first_line, rest = desc.split("\n", 1)
154 desc = first_line + "\n" + inspect.cleandoc(rest)
155 stream.append(StreamToken(base, key, args, desc))
156 return stream
159def _combine_params(stream: list[StreamToken]) -> dict[str, dict[str, Optional[str]]]:
160 """Group the list of tokens into sections based on section and information..
162 Parameters
163 ----------
164 stream : list[StreamToken]
165 List of tokens to group into dict.
167 Returns
168 -------
169 dict[str, dict[str, Optional[str]]]
170 Dictionary grouping parsed param sections
171 by section (param name, "return", "yield") and
172 information they represent (type_name, description)
173 """
174 params: dict[str, dict[str, Optional[str]]] = {}
175 for base, key, args, desc in stream:
176 if base not in ["param", "return", "yield"]:
177 continue # nothing to do
178 arg_name = args[0] if base == "param" else base
179 info = params.setdefault(arg_name, {})
180 info_key = "type_name" if "type" in key else "description"
181 info[info_key] = desc
182 return params
185def _add_meta_information(
186 stream: list[StreamToken],
187 params: dict[str, dict[str, Optional[str]]],
188 ret: Docstring,
189) -> None:
190 """Add the meta information into the docstring instance.
192 Parameters
193 ----------
194 stream : list[StreamToken]
195 Stream of tokens of the string-
196 params : dict[str, dict[str, Optional[str]]]
197 Grouped information about each section.
198 ret : Docstring
199 Docstring instance to add the information to.
201 Raises
202 ------
203 ParseError
204 If an unexpected section is encountered.
205 """
206 is_done: dict[str, bool] = {}
207 for token in stream:
208 if token.base == "param" and not is_done.get(token.args[0], False):
209 (arg_name,) = token.args
210 info = params[arg_name]
211 type_name = info.get("type_name")
213 if type_name and type_name.endswith("?"):
214 is_optional = True
215 type_name = type_name[:-1]
216 else:
217 is_optional = False
219 match = re.match(r".*defaults to (.+)", token.desc, flags=re.DOTALL)
220 default = match[1].rstrip(".") if match else None
222 meta_item = DocstringParam(
223 args=[token.key, arg_name],
224 description=info.get("description"),
225 arg_name=arg_name,
226 type_name=type_name,
227 is_optional=is_optional,
228 default=default,
229 )
230 is_done[arg_name] = True
231 elif token.base == "return" and not is_done.get("return", False):
232 info = params["return"]
233 meta_item = DocstringReturns(
234 args=[token.key],
235 description=info.get("description"),
236 type_name=info.get("type_name"),
237 is_generator=False,
238 )
239 is_done["return"] = True
240 elif token.base == "yield" and not is_done.get("yield", False):
241 info = params["yield"]
242 meta_item = DocstringYields(
243 args=[token.key],
244 description=info.get("description"),
245 type_name=info.get("type_name"),
246 is_generator=True,
247 )
248 is_done["yield"] = True
249 elif token.base == "raise":
250 (type_name,) = token.args or (None,)
251 meta_item = DocstringRaises(
252 args=[token.key, *token.args],
253 description=token.desc,
254 type_name=type_name,
255 )
256 elif token.base == "meta":
257 meta_item = DocstringMeta(
258 args=[token.key, *token.args],
259 description=token.desc,
260 )
261 else:
262 arg_key = token.args[0] if token.args else token.base
263 if not is_done.get(arg_key, False):
264 msg = (
265 "Error building meta information. "
266 f"Encountered unexpected section {arg_key}."
267 )
268 raise ParseError(msg)
269 continue # don't append
271 ret.meta.append(meta_item)
274def parse(text: Optional[str]) -> Docstring:
275 """Parse the epydoc-style docstring into its components.
277 Parameters
278 ----------
279 text : Optional[str]
280 docstring to parse
282 Returns
283 -------
284 Docstring
285 parsed docstring
286 """
287 ret = Docstring(style=DocstringStyle.EPYDOC)
288 if not text:
289 return ret
291 text = inspect.cleandoc(text)
292 if match := re.search("^@", text, flags=re.M):
293 desc_chunk = text[: match.start()]
294 meta_chunk = text[match.start() :]
295 else:
296 desc_chunk = text
297 meta_chunk = ""
299 split_description(ret, desc_chunk)
301 patterns = SectionPattern(
302 param=re.compile(r"(param|keyword|type)(\s+[_A-z][_A-z0-9]*\??):"),
303 raises=re.compile(r"(raise)(\s+[_A-z][_A-z0-9]*\??)?:"),
304 returns=re.compile(r"(return|rtype|yield|ytype):"),
305 meta=re.compile(r"([_A-z][_A-z0-9]+)((\s+[_A-z][_A-z0-9]*\??)*):"),
306 )
308 # tokenize
309 stream = _tokenize(meta_chunk, patterns)
311 # Combine type_name, arg_name, and description information
312 params = _combine_params(stream)
314 _add_meta_information(stream, params, ret)
316 return ret
319def compose(
320 docstring: Docstring,
321 rendering_style: RenderingStyle = RenderingStyle.COMPACT,
322 indent: str = " ",
323) -> str:
324 """Render a parsed docstring into docstring text.
326 Parameters
327 ----------
328 docstring : Docstring
329 parsed docstring representation
330 rendering_style : RenderingStyle
331 the style to render docstrings (Default value = RenderingStyle.COMPACT)
332 indent : str
333 the characters used as indentation in the
334 docstring string (Default value = ' ')
336 Returns
337 -------
338 str
339 docstring text
340 """
342 def process_desc(desc: Optional[str], *, is_type: bool) -> str:
343 """Process a description section.
345 Parameters
346 ----------
347 desc : Optional[str]
348 Description to process
349 is_type : bool
350 Whether the description represent type information.
352 Returns
353 -------
354 str
355 The properly rendered description information.
356 """
357 if not desc:
358 return ""
360 if rendering_style == RenderingStyle.EXPANDED or (
361 rendering_style == RenderingStyle.CLEAN and not is_type
362 ):
363 (first, *rest) = desc.splitlines()
364 return "\n".join(["\n" + indent + first] + [indent + line for line in rest])
366 (first, *rest) = desc.splitlines()
367 return "\n".join([f" {first}"] + [indent + line for line in rest])
369 parts: list[str] = []
370 append_description(docstring, parts)
372 for meta in docstring.meta:
373 if isinstance(meta, DocstringParam):
374 if meta.type_name:
375 type_name = f"{meta.type_name}?" if meta.is_optional else meta.type_name
376 text = f"@type {meta.arg_name}:"
377 text += process_desc(type_name, is_type=True)
378 parts.append(text)
379 text = (
380 f"@param {meta.arg_name}:"
381 f"{process_desc(meta.description, is_type=False)}"
382 )
383 parts.append(text)
384 elif isinstance(meta, (DocstringReturns, DocstringYields)):
385 (arg_key, type_key) = (
386 ("yield", "ytype")
387 if isinstance(meta, DocstringYields)
388 else ("return", "rtype")
389 )
390 if meta.type_name:
391 text = f"@{type_key}:{process_desc(meta.type_name, is_type=True)}"
392 parts.append(text)
393 if meta.description:
394 text = f"@{arg_key}:{process_desc(meta.description, is_type=False)}"
395 parts.append(text)
396 elif isinstance(meta, DocstringRaises):
397 text = f"@raise {meta.type_name}:" if meta.type_name else "@raise:"
398 text += process_desc(meta.description, is_type=False)
399 parts.append(text)
400 else:
401 text = f'@{" ".join(meta.args)}:'
402 text += process_desc(meta.description, is_type=False)
403 parts.append(text)
404 return "\n".join(parts)