Coverage for pymend\docstring_parser\google.py: 96%
216 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
1"""Google-style docstring parsing."""
3import inspect
4import re
5from collections import OrderedDict
6from collections.abc import Mapping, Sequence
7from enum import IntEnum
8from typing import NamedTuple, Optional
10from .common import (
11 EXAMPLES_KEYWORDS,
12 PARAM_KEYWORDS,
13 RAISES_KEYWORDS,
14 RETURNS_KEYWORDS,
15 YIELDS_KEYWORDS,
16 Docstring,
17 DocstringExample,
18 DocstringMeta,
19 DocstringParam,
20 DocstringRaises,
21 DocstringReturns,
22 DocstringStyle,
23 DocstringYields,
24 MainSections,
25 ParseError,
26 RenderingStyle,
27 append_description,
28 split_description,
29)
32class SectionType(IntEnum):
33 """Types of sections."""
35 SINGULAR = 0
36 """For sections like examples."""
38 MULTIPLE = 1
39 """For sections like params."""
41 SINGULAR_OR_MULTIPLE = 2
42 """For sections like returns or yields."""
45class Section(NamedTuple):
46 """A docstring section."""
48 title: str
49 key: str
50 type_info: SectionType
53GOOGLE_TYPED_ARG_REGEX = re.compile(r"\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)")
54GOOGLE_ARG_DESC_REGEX = re.compile(r".*\. Defaults to (.+)\.")
55MULTIPLE_PATTERN = re.compile(
56 # Match anything that has leading whitespace and then contiguous non-whitespace
57 # (non colon) character followed by a colon.
58 # somecontiguoustype: some description
59 r"(\s*[^:\s]+:)"
60 # Match anything that has some contiguous text, then something in parens,
61 # immediately followed by a colon.
62 r"|(\s*[^:\s]+\s+\(.+\):)"
63 # Allow whitespace if we have a closing ] before the color, optionally with a )
64 # some var name (list[int, int]): some description
65 r"|([^:]*\]:.*)"
66 # Allow for arbitrary changing of pipe character for type annotations int | str
67 # Where the individual types are allowed to have spaces as long as they start
68 # and end without one ([^\s|][^\|]*[^\s|])
69 r"|(\s*[^\s|][^\|]*[^\s|](\s*\|\s*[^\s|][^\|]*[^\s|])+:)"
70)
72DEFAULT_SECTIONS = [
73 Section("Arguments", "param", SectionType.MULTIPLE),
74 Section("Args", "param", SectionType.MULTIPLE),
75 Section("Parameters", "param", SectionType.MULTIPLE),
76 Section("Params", "param", SectionType.MULTIPLE),
77 Section("Raises", "raises", SectionType.MULTIPLE),
78 Section("Exceptions", "raises", SectionType.MULTIPLE),
79 Section("Except", "raises", SectionType.MULTIPLE),
80 Section("Attributes", "attribute", SectionType.MULTIPLE),
81 Section("Example", "examples", SectionType.SINGULAR),
82 Section("Examples", "examples", SectionType.SINGULAR),
83 Section("Returns", "returns", SectionType.SINGULAR_OR_MULTIPLE),
84 Section("Yields", "yields", SectionType.SINGULAR_OR_MULTIPLE),
85]
88class GoogleParser:
89 """Parser for Google-style docstrings."""
91 def __init__(
92 self, sections: Optional[list[Section]] = None, *, title_colon: bool = True
93 ) -> None:
94 """Set up sections.
96 Parameters
97 ----------
98 sections : Optional[list[Section]]
99 Recognized sections or None to defaults.
100 title_colon : bool
101 Require colon after section title. (Default value = True)
102 """
103 if not sections:
104 sections = DEFAULT_SECTIONS
105 self.sections = {s.title: s for s in sections}
106 self.title_colon = title_colon
107 self._setup()
109 def _setup(self) -> None:
110 """Set up parser with the colon type and title regex."""
111 colon = ":" if self.title_colon else ""
112 self.titles_re = re.compile(
113 "^("
114 + "|".join(f"({t})" for t in self.sections)
115 + ")"
116 + colon
117 + "[ \t\r\f\v]*$",
118 flags=re.M,
119 )
121 @staticmethod
122 def _build_single_meta(section: Section, desc: str) -> DocstringMeta:
123 """Build docstring element for single line sections.
125 Parameters
126 ----------
127 section : Section
128 The section that is being processed.
129 desc : str
130 docstring element text
132 Returns
133 -------
134 DocstringMeta
135 Docstring meta wrapper.
137 Raises
138 ------
139 ParseError
140 If the section represents a parameter section.
141 In that case we would not expect to be in the single line function.
142 """
143 if section.key in RETURNS_KEYWORDS:
144 return DocstringReturns(
145 args=[section.key],
146 description=desc,
147 type_name=None,
148 is_generator=False,
149 )
150 if section.key in YIELDS_KEYWORDS:
151 return DocstringYields(
152 args=[section.key],
153 description=desc,
154 type_name=None,
155 is_generator=True,
156 )
157 if section.key in RAISES_KEYWORDS:
158 return DocstringRaises(args=[section.key], description=desc, type_name=None)
159 if section.key in EXAMPLES_KEYWORDS:
160 return DocstringExample(args=[section.key], snippet=None, description=desc)
161 if section.key in PARAM_KEYWORDS:
162 msg = "Expected parameter name."
163 raise ParseError(msg)
164 return DocstringMeta(args=[section.key], description=desc)
166 def _prepare_multi_meta(self, section: Section, text: str) -> tuple[str, str]:
167 """Check text for consistency and split into before and desc.
169 Parameters
170 ----------
171 section : Section
172 The section that is being processed.
173 text : str
174 docstring element text
176 Returns
177 -------
178 before : str
179 The part before the colon.
180 desc : str
181 The description of the element.
183 Raises
184 ------
185 ParseError
186 If the text did not match the multi pattern regex.
187 ParseError
188 If there is no colon in the text.
189 """
190 if not MULTIPLE_PATTERN.match(text):
191 msg = (
192 "Could not match multi pattern to split "
193 f"chunk part {text!r} for section {section.title}."
194 )
195 raise ParseError(msg)
196 if ":" not in text: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true
197 msg = f"Expected a colon in {text!r} for title {section.title}."
198 raise ParseError(msg)
200 # Split spec and description
201 before, desc = text.split(":", 1)
202 if desc:
203 desc = desc[1:] if desc[0] == " " else desc
204 if "\n" in desc:
205 first_line, rest = desc.split("\n", 1)
206 desc = first_line + "\n" + inspect.cleandoc(rest)
207 desc = desc.strip("\n")
208 return before, desc
210 def _build_multi_meta(self, section: Section, text: str) -> DocstringMeta:
211 """Build docstring element for multiline section.
213 Parameters
214 ----------
215 section : Section
216 The section that is being processed.
217 text : str
218 title of section containing element
220 Returns
221 -------
222 DocstringMeta
223 docstring meta element
225 Raises
226 ------
227 ParseError
228 If the text lacks a colon ':'
229 """
230 before, desc = self._prepare_multi_meta(section, text)
232 if section.key in PARAM_KEYWORDS:
233 match = GOOGLE_TYPED_ARG_REGEX.match(before)
234 if match:
235 arg_name, type_name = match.group(1, 2)
236 if type_name.endswith(", optional"):
237 is_optional = True
238 type_name = type_name[:-10]
239 elif type_name.endswith("?"):
240 is_optional = True
241 type_name = type_name[:-1]
242 else:
243 is_optional = False
244 else:
245 arg_name, type_name = before, None
246 is_optional = None
248 match = GOOGLE_ARG_DESC_REGEX.match(desc)
249 default = match.group(1) if match else None
251 return DocstringParam(
252 args=[section.key, before],
253 description=desc,
254 arg_name=arg_name,
255 type_name=type_name,
256 is_optional=is_optional,
257 default=default,
258 )
259 if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:
260 match = GOOGLE_TYPED_ARG_REGEX.match(before)
261 if match:
262 arg_name, type_name = match.group(1, 2)
263 else:
264 arg_name, type_name = None, before
265 if section.key in RETURNS_KEYWORDS:
266 return DocstringReturns(
267 args=[section.key, arg_name or type_name],
268 description=desc,
269 return_name=arg_name,
270 type_name=type_name,
271 is_generator=False,
272 )
273 return DocstringYields(
274 args=[section.key, arg_name or type_name],
275 description=desc,
276 yield_name=arg_name,
277 type_name=type_name,
278 is_generator=True,
279 )
280 if section.key in RAISES_KEYWORDS:
281 return DocstringRaises(
282 args=[section.key, before], description=desc, type_name=before
283 )
284 return DocstringMeta(args=[section.key, before], description=desc)
286 def add_section(self, section: Section) -> None:
287 """Add or replace a section.
289 Parameters
290 ----------
291 section : Section
292 The new section.
293 """
294 self.sections[section.title] = section
295 self._setup()
297 def _split_sections(self, meta_chunk: str) -> Mapping[str, str]:
298 """Split the cunk into sections as determined by the titles..
300 Parameters
301 ----------
302 meta_chunk : str
303 Part of the docstring NOT holding the description.
305 Returns
306 -------
307 Mapping[str, str]
308 Mapping between sectrion title and part of the docstring that deals with it.
309 """
310 chunks: Mapping[str, str] = OrderedDict()
311 matches = list(self.titles_re.finditer(meta_chunk))
312 if not matches:
313 return chunks
314 splits = [
315 (matches[j].end(), matches[j + 1].start()) for j in range(len(matches) - 1)
316 ]
317 splits.append((matches[-1].end(), len(meta_chunk)))
318 for j, (start, end) in enumerate(splits):
319 title = matches[j].group(1)
320 if title not in self.sections: 320 ↛ 321line 320 didn't jump to line 321, because the condition on line 320 was never true
321 continue
323 # Clear Any Unknown Meta
324 # Ref: https://github.com/rr-/docstring_parser/issues/29
325 meta_details = meta_chunk[start:end]
326 unknown_meta = re.search(r"\n\S", meta_details)
327 if unknown_meta is not None:
328 meta_details = meta_details[: unknown_meta.start()]
330 chunks[title] = meta_details.strip("\n")
331 return chunks
333 def _determine_indent(self, chunk: str) -> str:
334 """Determine indent.
336 Parameters
337 ----------
338 chunk : str
339 Chunk to determine the indent for.
341 Returns
342 -------
343 str
344 String representing the indent.
346 Raises
347 ------
348 ParseError
349 If no indent could be determined.
350 """
351 indent_match = re.search(r"^\s*", chunk)
352 if not indent_match: 352 ↛ 353line 352 didn't jump to line 353, because the condition on line 352 was never true
353 msg = f"Can't infer indent from '{chunk}'"
354 raise ParseError(msg)
355 return indent_match.group()
357 def _get_chunks(self, text: str) -> tuple[str, str]:
358 """Split docstring into description and meta part.
360 Parameters
361 ----------
362 text : str
363 Docstring text to split.
365 Returns
366 -------
367 tuple[str, str]
368 Docstring representing the description and the rest.
369 """
370 if match := self.titles_re.search(text):
371 return text[: match.start()], text[match.start() :]
372 return text, ""
374 def _get_multi_chunk_splits(
375 self, chunk: str, title: str, indent: str
376 ) -> list[tuple[int, int]]:
377 """Get the starting and ending position for each element of a multi chunk.
379 Parameters
380 ----------
381 chunk : str
382 Full chunk to split.
383 title : str
384 Title of the section represented by the chunk.
385 indent : str
386 Indent before each element of the chunk.
388 Returns
389 -------
390 list[tuple[int, int]]
391 List of all start and end positions of each element of the chunk.
393 Raises
394 ------
395 ParseError
396 If no entry could be found with the expected indent.
397 """
398 # Split based on lines which have exactly that indent
399 c_matches = list(re.finditer(rf"^{indent}(?=\S)", chunk, flags=re.M))
400 if not c_matches:
401 msg = f'No specification for "{title}": "{chunk}"'
402 raise ParseError(msg)
403 c_splits = [
404 (c_cur.end(), c_next.start())
405 for c_cur, c_next in zip(c_matches, c_matches[1:])
406 ]
407 c_splits.append((c_matches[-1].end(), len(chunk)))
408 return c_splits
410 def parse(self, text: Optional[str]) -> Docstring:
411 """Parse the Google-style docstring into its components.
413 Parameters
414 ----------
415 text : Optional[str]
416 docstring text
418 Returns
419 -------
420 Docstring
421 parsed docstring
423 Raises
424 ------
425 ParseError
426 If no specification could be found for a title, chunk pair.
427 """
428 ret = Docstring(style=DocstringStyle.GOOGLE)
429 if not text:
430 return ret
432 # Clean according to PEP-0257
433 text = inspect.cleandoc(text)
435 desc_chunk, meta_chunk = self._get_chunks(text)
437 # Break description into short and long parts
438 split_description(ret, desc_chunk)
440 # Split by sections determined by titles
441 chunks = self._split_sections(meta_chunk)
443 if not chunks:
444 return ret
446 # Add elements from each chunk
447 for title, chunk in chunks.items():
448 # Determine indent
449 indent = self._determine_indent(chunk)
450 section = self.sections[title]
451 # Check for singular elements
452 if section.type_info == SectionType.SINGULAR:
453 part = inspect.cleandoc(chunk)
454 ret.meta.append(self._build_single_meta(section, part))
455 continue
457 # Split based on lines which have exactly that indent
458 c_splits = self._get_multi_chunk_splits(chunk, title, indent)
459 if section.type_info == SectionType.MULTIPLE:
460 for start, end in c_splits:
461 part = chunk[start:end].strip("\n")
462 ret.meta.append(self._build_multi_meta(section, part))
463 else: # SectionType.SINGULAR_OR_MULTIPLE
464 # Try to handle it as a multiple section with multiple entries
465 try:
466 metas = [
467 self._build_multi_meta(section, chunk[start:end].strip("\n"))
468 for start, end in c_splits
469 ]
470 # Fall back to a singular entry for multi or single section
471 except ParseError:
472 part = inspect.cleandoc(chunk)
473 if MULTIPLE_PATTERN.match(part):
474 ret.meta.append(self._build_multi_meta(section, part))
475 else:
476 ret.meta.append(self._build_single_meta(section, part))
477 else:
478 ret.meta.extend(metas)
479 return ret
482def parse(text: Optional[str]) -> Docstring:
483 """Parse the Google-style docstring into its components.
485 Parameters
486 ----------
487 text : Optional[str]
488 docstring text
490 Returns
491 -------
492 Docstring
493 parsed docstring
494 """
495 return GoogleParser().parse(text)
498def compose( # noqa: PLR0915
499 docstring: Docstring,
500 rendering_style: RenderingStyle = RenderingStyle.COMPACT,
501 indent: str = " ",
502) -> str:
503 """Render a parsed docstring into docstring text.
505 Parameters
506 ----------
507 docstring : Docstring
508 parsed docstring representation
509 rendering_style : RenderingStyle
510 the style to render docstrings (Default value = RenderingStyle.COMPACT)
511 indent : str
512 the characters used as indentation in the
513 docstring string (Default value = ' ')
515 Returns
516 -------
517 str
518 docstring text
519 """
521 def process_one(one: MainSections) -> None:
522 """Build the output text for one entry in a section.
524 Parameters
525 ----------
526 one : MainSections
527 Docstring for which to build the raw text.
528 """
529 head = ""
531 if isinstance(one, DocstringParam):
532 head += one.arg_name or ""
533 elif isinstance(one, DocstringReturns):
534 head += one.return_name or ""
535 elif isinstance(one, DocstringYields):
536 head += one.yield_name or ""
538 if isinstance(one, DocstringParam) and one.is_optional:
539 optional = (
540 "?" if rendering_style == RenderingStyle.COMPACT else ", optional"
541 )
542 else:
543 optional = ""
545 if one.type_name and head:
546 head += f" ({one.type_name}{optional}):"
547 elif one.type_name:
548 head += f"{one.type_name}{optional}:"
549 else:
550 head += ":"
551 head = indent + head
553 if one.description and rendering_style == RenderingStyle.EXPANDED:
554 body = f"\n{indent}{indent}".join([head, *one.description.splitlines()])
555 parts.append(body)
556 elif one.description:
557 (first, *rest) = one.description.splitlines()
558 body = f"\n{indent}{indent}".join([f"{head} {first}", *rest])
559 parts.append(body)
560 else:
561 parts.append(head)
563 def process_sect(name: str, args: Sequence[MainSections]) -> None:
564 """Build the output for a docstring section.
566 Parameters
567 ----------
568 name : str
569 Section for which to build the output.
570 args : Sequence[MainSections]
571 List of individual elements of that section.
572 """
573 if args:
574 parts.append(name)
575 for arg in args:
576 process_one(arg)
577 parts.append("")
579 parts: list[str] = []
580 append_description(docstring, parts)
582 process_sect("Args:", [p for p in docstring.params or [] if p.args[0] == "param"])
584 process_sect(
585 "Attributes:",
586 [p for p in docstring.params or [] if p.args[0] == "attribute"],
587 )
589 process_sect(
590 "Returns:",
591 docstring.many_returns,
592 )
594 process_sect("Yields:", docstring.many_yields)
596 process_sect("Raises:", docstring.raises or [])
598 if docstring.returns and not docstring.many_returns: 598 ↛ 599line 598 didn't jump to line 599, because the condition on line 598 was never true
599 ret = docstring.returns
600 parts.append("Yields:" if ret else "Returns:")
601 parts.append("-" * len(parts[-1]))
602 process_one(ret)
604 for meta in docstring.meta:
605 if isinstance(
606 meta, (DocstringParam, DocstringReturns, DocstringRaises, DocstringYields)
607 ):
608 continue # Already handled
609 parts.append(meta.args[0].replace("_", "").title() + ":")
610 if meta.description:
611 lines = [indent + line for line in meta.description.splitlines()]
612 parts.append("\n".join(lines))
613 parts.append("")
615 while parts and not parts[-1]:
616 parts.pop()
618 return "\n".join(parts)