Coverage for pymend\docstring_parser\numpydoc.py: 93%
237 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
1"""Numpydoc-style docstring parsing.
3See
4---
5https://numpydoc.readthedocs.io/en/latest/format.html
6"""
8import inspect
9import itertools
10import re
11from collections.abc import Iterable, Iterator
12from textwrap import dedent
13from typing import Optional, TypeVar
15from typing_extensions import override
17from .common import (
18 Docstring,
19 DocstringDeprecated,
20 DocstringExample,
21 DocstringMeta,
22 DocstringParam,
23 DocstringRaises,
24 DocstringReturns,
25 DocstringStyle,
26 DocstringYields,
27 MainSections,
28 ParseError,
29 RenderingStyle,
30 clean_str,
31 split_description,
32)
34_T = TypeVar("_T")
37def _pairwise(
38 iterable: Iterable[_T], end: Optional[_T] = None
39) -> Iterator[tuple[_T, Optional[_T]]]:
40 """Iterate over successive pairs with overhang for last element.
42 Parameters
43 ----------
44 iterable : Iterable[_T]
45 Iterable to iterate over.
46 end : Optional[_T]
47 Value for the overhang (Default value = None)
49 Returns
50 -------
51 Iterator[tuple[_T, Optional[_T]]]
52 Iterator yielding the successive pairs.
53 """
54 left, right = itertools.tee(iterable)
55 next(right, None)
56 return zip(left, itertools.chain(right, [end]))
59KV_REGEX = re.compile(r"^[^\s].*$", flags=re.M)
60PARAM_KEY_REGEX = re.compile(r"^(?P<name>.*?)(?:\s+:\s*(?P<type>.*?))?$")
61PARAM_OPTIONAL_REGEX = re.compile(r"(?P<type>.*?)(?:, optional|\(optional\))$")
63# numpydoc format has no formal grammar for this,
64# but we can make some educated guesses...
65PARAM_DEFAULT_REGEX = re.compile(
66 r"(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)"
67)
69RETURN_KEY_REGEX = re.compile(r"^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$")
72class Section:
73 """Numpydoc section parser."""
75 def __init__(self, title: str, key: str) -> None:
76 """Initialize a section.
78 Parameters
79 ----------
80 title : str
81 section title. For most sections, this is a heading like
82 "Parameters" which appears on its own line, underlined by
83 en-dashes ('-') on the following line.
84 key : str
85 meta key string. In the parsed ``DocstringMeta`` instance this
86 will be the first element of the ``args`` attribute list.
87 """
88 self.title = title
89 self.key = key
91 @property
92 def title_pattern(self) -> str:
93 """Regular expression pattern matching this section's header.
95 This pattern will match this instance's ``title`` attribute in
96 an anonymous group.
98 Returns
99 -------
100 str
101 Regex pattern as a string.
102 """
103 dashes = "-" * len(self.title)
104 return rf"^({self.title})\s*?\n{dashes}\s*$"
106 def parse(self, text: str) -> Iterable[DocstringMeta]:
107 """Parse ``DocstringMeta`` objects from the body of this section.
109 Parameters
110 ----------
111 text : str
112 section body text. Should be cleaned with
113 ``inspect.cleandoc`` before parsing.
115 Yields
116 ------
117 DocstringMeta
118 object from this section body.
119 """
120 yield DocstringMeta([self.key], description=clean_str(text))
123class _KVSection(Section):
124 """Base parser for numpydoc sections with key-value syntax.
126 E.g. sections that look like this:
127 key
128 value
129 key2 : type
130 values can also span...
131 ... multiple lines
132 """
134 def _parse_item(self, key: str, value: str) -> DocstringMeta:
135 """Abstract method for parsing a single item of a section.
137 Parameters
138 ----------
139 key : str
140 Key of the item to parse
141 value : str
142 Value of the item to parse
144 Raises
145 ------
146 NotImplementedError
147 To be implemented by child classes.
148 """
149 raise NotImplementedError
151 @override
152 def parse(self, text: str) -> Iterable[DocstringMeta]:
153 """Parse all items in the docstring text.
155 Parameters
156 ----------
157 text : str
158 Docstring text to parse.
160 Yields
161 ------
162 DocstringMeta
163 Items parsed from the docstring.
164 """
165 for match, next_match in _pairwise(KV_REGEX.finditer(text)):
166 start = match.end()
167 end = next_match.start() if next_match is not None else None
168 value = text[start:end]
169 yield self._parse_item(key=match.group(), value=inspect.cleandoc(value))
172class _SphinxSection(Section):
173 """Base parser for numpydoc sections with sphinx-style syntax.
175 E.g. sections that look like this:
176 .. title:: something
177 possibly over multiple lines
178 """
180 @property
181 @override
182 def title_pattern(self) -> str:
183 """Title pattern used by sphinx sections.
185 Returns
186 -------
187 str
188 Regex pattern as a string.
189 """
190 return rf"^\.\.\s*({self.title})\s*::"
193class ParamSection(_KVSection):
194 """Parser for numpydoc parameter sections.
196 E.g. any section that looks like this:
197 arg_name
198 arg_description
199 arg_2 : type, optional
200 descriptions can also span...
201 ... multiple lines
202 """
204 @override
205 def _parse_item(self, key: str, value: str) -> DocstringParam:
206 """Parse item from a parameter section.
208 Parameters
209 ----------
210 key : str
211 Key of the item. Contains parameter name and optionally type information.
212 value : str
213 Description for the item. Also possibly contains default value.
215 Returns
216 -------
217 DocstringParam
218 Parsed representation of the parameter item.
220 Raises
221 ------
222 ParseError
223 If no key could be parsed.
224 ParseError
225 If mandatory parts of the section were parsed incorrectly.
226 """
227 match = PARAM_KEY_REGEX.match(key)
228 arg_name = type_name = is_optional = None
229 if match is None: 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true
230 msg = f"Could not parse param key on line `{key}`"
231 raise ParseError(msg)
232 arg_name = match.group("name")
233 type_name = match.group("type")
234 if not isinstance(arg_name, str): 234 ↛ 235line 234 didn't jump to line 235
235 msg = (
236 f"Did not get a string when capturing mandatory section"
237 f" 'arg_name' for key line `{key}`. Got `{arg_name}` instead."
238 )
239 raise ParseError(msg)
240 if isinstance(type_name, str):
241 optional_match = PARAM_OPTIONAL_REGEX.match(type_name)
242 if optional_match is not None:
243 type_name = optional_match.group("type")
244 is_optional = True
245 else:
246 is_optional = False
247 else:
248 type_name = None
250 default = None
251 if value != "":
252 default_match = PARAM_DEFAULT_REGEX.search(value)
253 if default_match is not None:
254 default = default_match.group("value")
256 return DocstringParam(
257 args=[self.key, arg_name],
258 description=clean_str(value),
259 arg_name=arg_name,
260 type_name=type_name,
261 is_optional=is_optional,
262 default=default,
263 )
266class RaisesSection(_KVSection):
267 """Parser for numpydoc raises sections.
269 E.g. any section that looks like this:
270 ValueError
271 A description of what might raise ValueError
272 """
274 @override
275 def _parse_item(self, key: str, value: str) -> DocstringRaises:
276 """Parse an item in the raises section.
278 Parameters
279 ----------
280 key : str
281 Key of the item to be parsed. Usually name of the exception raised.
282 value : str
283 Description of the item.
285 Returns
286 -------
287 DocstringRaises
288 Parsed representation of the raises item.
289 """
290 return DocstringRaises(
291 args=[self.key, key],
292 description=clean_str(value),
293 type_name=key if key != "" else None,
294 )
297class ReturnsSection(_KVSection):
298 """Parser for numpydoc returns sections.
300 E.g. any section that looks like this:
301 return_name : type
302 A description of this returned value
303 another_type
304 Return names are optional, types are required
305 """
307 is_generator = False
309 @override
310 def _parse_item(self, key: str, value: str) -> DocstringReturns:
311 """Parse an item from the return section.
313 Parameters
314 ----------
315 key : str
316 Key of the item (usually type, possibly name + type)
317 value : str
318 Description of the return value.
320 Returns
321 -------
322 DocstringReturns
323 Parsed representation of the return item.
324 """
325 match = RETURN_KEY_REGEX.match(key)
326 if match is not None: 326 ↛ 330line 326 didn't jump to line 330, because the condition on line 326 was never false
327 return_name = match.group("name")
328 type_name = match.group("type")
329 else:
330 return_name = None
331 type_name = None
333 return DocstringReturns(
334 args=[self.key],
335 description=clean_str(value),
336 type_name=type_name,
337 is_generator=self.is_generator,
338 return_name=return_name,
339 )
342class YieldsSection(_KVSection):
343 """Parser for numpydoc generator "yields" sections."""
345 is_generator = True
347 @override
348 def _parse_item(self, key: str, value: str) -> DocstringYields:
349 """Parse an item from the yield section.
351 Parameters
352 ----------
353 key : str
354 Key of the item (usually type, possibly name + type)
355 value : str
356 Description of the yielded value.
358 Returns
359 -------
360 DocstringYields
361 Parsed representation of the yield item.
362 """
363 match = RETURN_KEY_REGEX.match(key)
364 if match is not None: 364 ↛ 368line 364 didn't jump to line 368, because the condition on line 364 was never false
365 yield_name = match.group("name")
366 type_name = match.group("type")
367 else:
368 yield_name = None
369 type_name = None
371 return DocstringYields(
372 args=[self.key],
373 description=clean_str(value),
374 type_name=type_name,
375 is_generator=self.is_generator,
376 yield_name=yield_name,
377 )
380class DeprecationSection(_SphinxSection):
381 """Parser for numpydoc "deprecation warning" sections.
383 E.g. any section that looks like this:
384 .. deprecated:: 1.6.0
385 This description has
386 multiple lines!
387 """
389 @override
390 def parse(self, text: str) -> Iterable[DocstringDeprecated]:
391 """Parse ``DocstringDeprecated`` objects from the body of this section.
393 Parameters
394 ----------
395 text : str
396 Text of the deprecation section.
398 Yields
399 ------
400 DocstringDeprecated
401 Parsed representation of the deprecation item.
403 Raises
404 ------
405 ParseError
406 If the parsed version number was unexpectedly `None`.
407 Usually a lack of version number would be represented by an empty string.
408 """
409 version, desc, *_ = [*text.split(sep="\n", maxsplit=1), None, None]
410 if version is None: 410 ↛ 411line 410 didn't jump to line 411
411 msg = (
412 f"Got `None` while parsing version number "
413 f"in deprecated section `{text}`."
414 )
415 raise ParseError(msg)
416 if desc is not None:
417 desc = clean_str(inspect.cleandoc(desc))
419 yield DocstringDeprecated(
420 args=[self.key], description=desc, version=clean_str(version)
421 )
424class ExamplesSection(Section):
425 """Parser for numpydoc examples sections.
427 E.g. any section that looks like this:
429 Optional description for the following example. Always preceded
430 and followed by an empty line. Except for the first description.
432 >>> import numpy.matlib
433 >>> np.matlib.empty((2, 2)) # filled with random data
434 matrix([[ 6.76425276e-320, 9.79033856e-307], # random
435 [ 7.39337286e-309, 3.22135945e-309]])
437 Description for the second example.
439 >>> d = np.zeros((5,2))
440 >>> for i in range(5):
441 ... for j in range(2):
442 ... for k in range(3):
443 ... for n in range(4):
444 ... d[i,j] += a[k,n,i] * b[n,k,j]
445 >>> c == d
446 array([[ True, True],
447 [ True, True],
448 [ True, True],
449 [ True, True],
450 [ True, True]])
451 """
453 @override
454 def parse(self, text: str) -> Iterable[DocstringExample]:
455 """Parse ``DocstringExample`` objects from the body of this section.
457 Parameters
458 ----------
459 text : str
460 section body text. Should be cleaned with
461 ``inspect.cleandoc`` before parsing.
463 Yields
464 ------
465 DocstringExample
466 Docstring example sections
467 """
468 # Reverse so that we can efficiently pop from the back
469 # instead of doing constant pops from the front.
470 # Could also use a deque
471 # ---
472 # We add a newline to the end to not have to special case the first
473 # description.
474 lines = [*list(reversed(dedent(text).strip().splitlines())), "\n"]
475 while lines:
476 snippet_lines: list[str] = []
477 description_lines: list[str] = []
478 # Empty lines before the description
479 while lines and lines[-1].strip() == "":
480 lines.pop()
481 # Description. Should not start with ">>>". if that were the case
482 # Then there was no description.
483 while lines and lines[-1].strip() != "" and not lines[-1].startswith(">>>"):
484 description_lines.append(lines.pop())
485 # Empty lines after description
486 while lines and lines[-1].strip() == "":
487 lines.pop()
488 # Here the actual example starts.
489 # We take any line.
490 # The code part starts with ">>>" or "..."
491 # but the result part can be anything.
492 # Just keeping until an empty line which should indicate the next example.
493 while lines and lines[-1].strip() != "":
494 snippet_lines.append(lines.pop())
495 yield DocstringExample(
496 [self.key],
497 snippet="\n".join(snippet_lines) if snippet_lines else None,
498 description="\n".join(description_lines),
499 )
502DEFAULT_SECTIONS = [
503 ParamSection("Parameters", "param"),
504 ParamSection("Params", "param"),
505 ParamSection("Arguments", "param"),
506 ParamSection("Args", "param"),
507 ParamSection("Other Parameters", "other_param"),
508 ParamSection("Other Params", "other_param"),
509 ParamSection("Other Arguments", "other_param"),
510 ParamSection("Other Args", "other_param"),
511 ParamSection("Receives", "receives"),
512 ParamSection("Receive", "receives"),
513 RaisesSection("Raises", "raises"),
514 RaisesSection("Raise", "raises"),
515 RaisesSection("Warns", "warns"),
516 RaisesSection("Warn", "warns"),
517 ParamSection("Attributes", "attribute"),
518 ParamSection("Attribute", "attribute"),
519 ParamSection("Methods", "method"),
520 ParamSection("Method", "method"),
521 ReturnsSection("Returns", "returns"),
522 ReturnsSection("Return", "returns"),
523 YieldsSection("Yields", "yields"),
524 YieldsSection("Yield", "yields"),
525 ExamplesSection("Examples", "examples"),
526 ExamplesSection("Example", "examples"),
527 Section("Warnings", "warnings"),
528 Section("Warning", "warnings"),
529 Section("See Also", "see_also"),
530 Section("Related", "see_also"),
531 Section("Notes", "notes"),
532 Section("Note", "notes"),
533 Section("References", "references"),
534 Section("Reference", "references"),
535 DeprecationSection("deprecated", "deprecation"),
536]
539class NumpydocParser:
540 """Parser for numpydoc-style docstrings."""
542 def __init__(self, sections: Optional[Iterable[Section]] = None) -> None:
543 """Set up sections.
545 Parameters
546 ----------
547 sections : Optional[Iterable[Section]]
548 Recognized sections or None to defaults.
549 """
550 self.sections = {s.title: s for s in (sections or DEFAULT_SECTIONS)}
551 self._setup()
553 def _setup(self) -> None:
554 """Set up parser title regex."""
555 self.titles_re = re.compile(
556 r"|".join(s.title_pattern for s in self.sections.values()),
557 flags=re.M,
558 )
560 def add_section(self, section: Section) -> None:
561 """Add or replace a section.
563 Parameters
564 ----------
565 section : Section
566 The new section.
567 """
568 self.sections[section.title] = section
569 self._setup()
571 def parse(self, text: Optional[str]) -> Docstring:
572 """Parse the numpy-style docstring into its components.
574 Parameters
575 ----------
576 text : Optional[str]
577 docstring text
579 Returns
580 -------
581 Docstring
582 parsed docstring
583 """
584 ret = Docstring(style=DocstringStyle.NUMPYDOC)
585 if not text:
586 return ret
588 # Clean according to PEP-0257
589 text = inspect.cleandoc(text)
591 if match := self.titles_re.search(text):
592 desc_chunk = text[: match.start()]
593 meta_chunk = text[match.start() :]
594 else:
595 desc_chunk = text
596 meta_chunk = ""
598 # Break description into short and long parts
599 split_description(ret, desc_chunk)
601 for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):
602 title = next(g for g in match.groups() if g is not None) 602 ↛ exitline 602 didn't finish the generator expression on line 602
603 factory = self.sections[title]
605 # section chunk starts after the header,
606 # ends at the start of the next header
607 start = match.end()
608 end = nextmatch.start() if nextmatch is not None else None
609 ret.meta.extend(factory.parse(meta_chunk[start:end]))
611 return ret
614def parse(text: Optional[str]) -> Docstring:
615 """Parse the numpy-style docstring into its components.
617 Parameters
618 ----------
619 text : Optional[str]
620 docstring text
622 Returns
623 -------
624 Docstring
625 parsed docstring
626 """
627 return NumpydocParser().parse(text)
630def process_examples(examples: list[DocstringExample], parts: list[str]) -> None:
631 """Add string representation of examples section to parts.
633 Parameters
634 ----------
635 examples : list[DocstringExample]
636 DocstringExamples to add to parts.
637 parts : list[str]
638 List of strings representing the final output of compose().
639 indent : str
640 the characters used as indentation in the docstring string
641 (Default value = ' ')
642 """
643 if examples:
644 parts.append("Examples")
645 parts.append("-" * len(parts[-1]))
646 for i, example in enumerate(examples):
647 # Leave out newline for first example
648 if i != 0:
649 parts.append("")
650 if example.description:
651 parts.append(example.description)
652 # Only add a new line if we have an actual example snippet here.
653 # If not the next description will handle it.
654 if example.snippet:
655 parts.append("")
656 if example.snippet:
657 parts.append(example.snippet)
658 parts.append("")
661def compose( # noqa: PLR0915, PLR0912
662 # pylint: disable=W0613,R0915,R0912
663 docstring: Docstring,
664 rendering_style: RenderingStyle = RenderingStyle.COMPACT, # noqa: ARG001
665 indent: str = " ",
666) -> str:
667 """Render a parsed docstring into docstring text.
669 Parameters
670 ----------
671 docstring : Docstring
672 parsed docstring representation
673 rendering_style : RenderingStyle
674 the style to render docstrings (Default value = RenderingStyle.COMPACT)
675 indent : str
676 the characters used as indentation in the docstring string
677 (Default value = ' ')
679 Returns
680 -------
681 str
682 docstring text
683 """
685 def process_one(one: MainSections) -> None:
686 """Build the output text for one entry in a section.
688 Parameters
689 ----------
690 one : MainSections
691 Docstring for which to build the raw text.
692 """
693 if isinstance(one, DocstringParam):
694 head = one.arg_name
695 elif isinstance(one, DocstringReturns):
696 head = one.return_name
697 elif isinstance(one, DocstringYields):
698 head = one.yield_name
699 else:
700 head = None
702 if one.type_name and head:
703 head += f" : {one.type_name}"
704 elif one.type_name:
705 head = one.type_name
706 elif not head:
707 head = ""
709 if isinstance(one, DocstringParam) and one.is_optional:
710 head += ", optional"
712 if one.description:
713 body = f"\n{indent}".join([head, *one.description.splitlines()])
714 parts.append(body)
715 else:
716 parts.append(head)
718 def process_sect(name: str, args: list[MainSections]) -> None:
719 """Build the output for a docstring section.
721 Parameters
722 ----------
723 name : str
724 Section for which to build the output.
725 args : list[MainSections]
726 List of individual elements of that section.
727 """
728 if args:
729 parts.append(name)
730 parts.append("-" * len(name))
731 for arg in args:
732 process_one(arg)
733 parts.append("")
735 parts: list[str] = []
736 if docstring.short_description:
737 parts.append(docstring.short_description)
738 if docstring.blank_after_short_description:
739 parts.append("")
741 if docstring.deprecation:
742 first = ".. deprecated::"
743 if docstring.deprecation.version: 743 ↛ 745line 743 didn't jump to line 745, because the condition on line 743 was never false
744 first += f" {docstring.deprecation.version}"
745 if docstring.deprecation.description: 745 ↛ 748line 745 didn't jump to line 748, because the condition on line 745 was never false
746 rest = docstring.deprecation.description.splitlines()
747 else:
748 rest = []
749 sep = f"\n{indent}"
750 parts.append(sep.join([first, *rest]))
752 if docstring.long_description:
753 parts.append(docstring.long_description)
754 if docstring.blank_after_long_description:
755 parts.append("")
757 process_sect(
758 "Parameters",
759 [item for item in docstring.params or [] if item.args[0] == "param"],
760 )
762 process_sect(
763 "Attributes",
764 [item for item in docstring.params or [] if item.args[0] == "attribute"],
765 )
767 process_sect(
768 "Methods",
769 [item for item in docstring.params or [] if item.args[0] == "method"],
770 )
772 process_sect(
773 "Returns",
774 list(docstring.many_returns or []),
775 )
777 process_sect(
778 "Yields",
779 list(docstring.many_yields or []),
780 )
782 if docstring.returns and not docstring.many_returns: 782 ↛ 783line 782 didn't jump to line 783, because the condition on line 782 was never true
783 ret = docstring.returns
784 parts.append("Yields" if ret else "Returns")
785 parts.append("-" * len(parts[-1]))
786 process_one(ret)
788 process_sect(
789 "Receives",
790 [item for item in docstring.params or [] if item.args[0] == "receives"],
791 )
793 process_sect(
794 "Other Parameters",
795 [item for item in docstring.params or [] if item.args[0] == "other_param"],
796 )
798 process_sect(
799 "Raises",
800 [item for item in docstring.raises or [] if item.args[0] == "raises"],
801 )
803 process_sect(
804 "Warns",
805 [item for item in docstring.raises or [] if item.args[0] == "warns"],
806 )
808 process_examples(docstring.examples, parts)
810 for meta in docstring.meta:
811 if isinstance(
812 meta,
813 (
814 DocstringDeprecated,
815 DocstringParam,
816 DocstringReturns,
817 DocstringRaises,
818 DocstringYields,
819 DocstringExample,
820 ),
821 ):
822 continue # Already handled
824 parts.append(meta.args[0].replace("_", "").title())
825 parts.append("-" * len(meta.args[0]))
827 if meta.description:
828 parts.append(meta.description)
829 parts.append("")
831 while parts and not parts[-1]:
832 parts.pop()
834 return "\n".join(parts)