Coverage for pymend\docstring_parser\google.py: 96%

216 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-04-20 19:09 +0200

1"""Google-style docstring parsing.""" 

2 

3import inspect 

4import re 

5from collections import OrderedDict 

6from collections.abc import Mapping, Sequence 

7from enum import IntEnum 

8from typing import NamedTuple, Optional 

9 

10from .common import ( 

11 EXAMPLES_KEYWORDS, 

12 PARAM_KEYWORDS, 

13 RAISES_KEYWORDS, 

14 RETURNS_KEYWORDS, 

15 YIELDS_KEYWORDS, 

16 Docstring, 

17 DocstringExample, 

18 DocstringMeta, 

19 DocstringParam, 

20 DocstringRaises, 

21 DocstringReturns, 

22 DocstringStyle, 

23 DocstringYields, 

24 MainSections, 

25 ParseError, 

26 RenderingStyle, 

27 append_description, 

28 split_description, 

29) 

30 

31 

32class SectionType(IntEnum): 

33 """Types of sections.""" 

34 

35 SINGULAR = 0 

36 """For sections like examples.""" 

37 

38 MULTIPLE = 1 

39 """For sections like params.""" 

40 

41 SINGULAR_OR_MULTIPLE = 2 

42 """For sections like returns or yields.""" 

43 

44 

45class Section(NamedTuple): 

46 """A docstring section.""" 

47 

48 title: str 

49 key: str 

50 type_info: SectionType 

51 

52 

53GOOGLE_TYPED_ARG_REGEX = re.compile(r"\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)") 

54GOOGLE_ARG_DESC_REGEX = re.compile(r".*\. Defaults to (.+)\.") 

55MULTIPLE_PATTERN = re.compile( 

56 # Match anything that has leading whitespace and then contiguous non-whitespace 

57 # (non colon) character followed by a colon. 

58 # somecontiguoustype: some description 

59 r"(\s*[^:\s]+:)" 

60 # Match anything that has some contiguous text, then something in parens, 

61 # immediately followed by a colon. 

62 r"|(\s*[^:\s]+\s+\(.+\):)" 

63 # Allow whitespace if we have a closing ] before the color, optionally with a ) 

64 # some var name (list[int, int]): some description 

65 r"|([^:]*\]:.*)" 

66 # Allow for arbitrary changing of pipe character for type annotations int | str 

67 # Where the individual types are allowed to have spaces as long as they start 

68 # and end without one ([^\s|][^\|]*[^\s|]) 

69 r"|(\s*[^\s|][^\|]*[^\s|](\s*\|\s*[^\s|][^\|]*[^\s|])+:)" 

70) 

71 

72DEFAULT_SECTIONS = [ 

73 Section("Arguments", "param", SectionType.MULTIPLE), 

74 Section("Args", "param", SectionType.MULTIPLE), 

75 Section("Parameters", "param", SectionType.MULTIPLE), 

76 Section("Params", "param", SectionType.MULTIPLE), 

77 Section("Raises", "raises", SectionType.MULTIPLE), 

78 Section("Exceptions", "raises", SectionType.MULTIPLE), 

79 Section("Except", "raises", SectionType.MULTIPLE), 

80 Section("Attributes", "attribute", SectionType.MULTIPLE), 

81 Section("Example", "examples", SectionType.SINGULAR), 

82 Section("Examples", "examples", SectionType.SINGULAR), 

83 Section("Returns", "returns", SectionType.SINGULAR_OR_MULTIPLE), 

84 Section("Yields", "yields", SectionType.SINGULAR_OR_MULTIPLE), 

85] 

86 

87 

88class GoogleParser: 

89 """Parser for Google-style docstrings.""" 

90 

91 def __init__( 

92 self, sections: Optional[list[Section]] = None, *, title_colon: bool = True 

93 ) -> None: 

94 """Set up sections. 

95 

96 Parameters 

97 ---------- 

98 sections : Optional[list[Section]] 

99 Recognized sections or None to defaults. 

100 title_colon : bool 

101 Require colon after section title. (Default value = True) 

102 """ 

103 if not sections: 

104 sections = DEFAULT_SECTIONS 

105 self.sections = {s.title: s for s in sections} 

106 self.title_colon = title_colon 

107 self._setup() 

108 

109 def _setup(self) -> None: 

110 """Set up parser with the colon type and title regex.""" 

111 colon = ":" if self.title_colon else "" 

112 self.titles_re = re.compile( 

113 "^(" 

114 + "|".join(f"({t})" for t in self.sections) 

115 + ")" 

116 + colon 

117 + "[ \t\r\f\v]*$", 

118 flags=re.M, 

119 ) 

120 

121 @staticmethod 

122 def _build_single_meta(section: Section, desc: str) -> DocstringMeta: 

123 """Build docstring element for single line sections. 

124 

125 Parameters 

126 ---------- 

127 section : Section 

128 The section that is being processed. 

129 desc : str 

130 docstring element text 

131 

132 Returns 

133 ------- 

134 DocstringMeta 

135 Docstring meta wrapper. 

136 

137 Raises 

138 ------ 

139 ParseError 

140 If the section represents a parameter section. 

141 In that case we would not expect to be in the single line function. 

142 """ 

143 if section.key in RETURNS_KEYWORDS: 

144 return DocstringReturns( 

145 args=[section.key], 

146 description=desc, 

147 type_name=None, 

148 is_generator=False, 

149 ) 

150 if section.key in YIELDS_KEYWORDS: 

151 return DocstringYields( 

152 args=[section.key], 

153 description=desc, 

154 type_name=None, 

155 is_generator=True, 

156 ) 

157 if section.key in RAISES_KEYWORDS: 

158 return DocstringRaises(args=[section.key], description=desc, type_name=None) 

159 if section.key in EXAMPLES_KEYWORDS: 

160 return DocstringExample(args=[section.key], snippet=None, description=desc) 

161 if section.key in PARAM_KEYWORDS: 

162 msg = "Expected parameter name." 

163 raise ParseError(msg) 

164 return DocstringMeta(args=[section.key], description=desc) 

165 

166 def _prepare_multi_meta(self, section: Section, text: str) -> tuple[str, str]: 

167 """Check text for consistency and split into before and desc. 

168 

169 Parameters 

170 ---------- 

171 section : Section 

172 The section that is being processed. 

173 text : str 

174 docstring element text 

175 

176 Returns 

177 ------- 

178 before : str 

179 The part before the colon. 

180 desc : str 

181 The description of the element. 

182 

183 Raises 

184 ------ 

185 ParseError 

186 If the text did not match the multi pattern regex. 

187 ParseError 

188 If there is no colon in the text. 

189 """ 

190 if not MULTIPLE_PATTERN.match(text): 

191 msg = ( 

192 "Could not match multi pattern to split " 

193 f"chunk part {text!r} for section {section.title}." 

194 ) 

195 raise ParseError(msg) 

196 if ":" not in text: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 msg = f"Expected a colon in {text!r} for title {section.title}." 

198 raise ParseError(msg) 

199 

200 # Split spec and description 

201 before, desc = text.split(":", 1) 

202 if desc: 

203 desc = desc[1:] if desc[0] == " " else desc 

204 if "\n" in desc: 

205 first_line, rest = desc.split("\n", 1) 

206 desc = first_line + "\n" + inspect.cleandoc(rest) 

207 desc = desc.strip("\n") 

208 return before, desc 

209 

210 def _build_multi_meta(self, section: Section, text: str) -> DocstringMeta: 

211 """Build docstring element for multiline section. 

212 

213 Parameters 

214 ---------- 

215 section : Section 

216 The section that is being processed. 

217 text : str 

218 title of section containing element 

219 

220 Returns 

221 ------- 

222 DocstringMeta 

223 docstring meta element 

224 

225 Raises 

226 ------ 

227 ParseError 

228 If the text lacks a colon ':' 

229 """ 

230 before, desc = self._prepare_multi_meta(section, text) 

231 

232 if section.key in PARAM_KEYWORDS: 

233 match = GOOGLE_TYPED_ARG_REGEX.match(before) 

234 if match: 

235 arg_name, type_name = match.group(1, 2) 

236 if type_name.endswith(", optional"): 

237 is_optional = True 

238 type_name = type_name[:-10] 

239 elif type_name.endswith("?"): 

240 is_optional = True 

241 type_name = type_name[:-1] 

242 else: 

243 is_optional = False 

244 else: 

245 arg_name, type_name = before, None 

246 is_optional = None 

247 

248 match = GOOGLE_ARG_DESC_REGEX.match(desc) 

249 default = match.group(1) if match else None 

250 

251 return DocstringParam( 

252 args=[section.key, before], 

253 description=desc, 

254 arg_name=arg_name, 

255 type_name=type_name, 

256 is_optional=is_optional, 

257 default=default, 

258 ) 

259 if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS: 

260 match = GOOGLE_TYPED_ARG_REGEX.match(before) 

261 if match: 

262 arg_name, type_name = match.group(1, 2) 

263 else: 

264 arg_name, type_name = None, before 

265 if section.key in RETURNS_KEYWORDS: 

266 return DocstringReturns( 

267 args=[section.key, arg_name or type_name], 

268 description=desc, 

269 return_name=arg_name, 

270 type_name=type_name, 

271 is_generator=False, 

272 ) 

273 return DocstringYields( 

274 args=[section.key, arg_name or type_name], 

275 description=desc, 

276 yield_name=arg_name, 

277 type_name=type_name, 

278 is_generator=True, 

279 ) 

280 if section.key in RAISES_KEYWORDS: 

281 return DocstringRaises( 

282 args=[section.key, before], description=desc, type_name=before 

283 ) 

284 return DocstringMeta(args=[section.key, before], description=desc) 

285 

286 def add_section(self, section: Section) -> None: 

287 """Add or replace a section. 

288 

289 Parameters 

290 ---------- 

291 section : Section 

292 The new section. 

293 """ 

294 self.sections[section.title] = section 

295 self._setup() 

296 

297 def _split_sections(self, meta_chunk: str) -> Mapping[str, str]: 

298 """Split the cunk into sections as determined by the titles.. 

299 

300 Parameters 

301 ---------- 

302 meta_chunk : str 

303 Part of the docstring NOT holding the description. 

304 

305 Returns 

306 ------- 

307 Mapping[str, str] 

308 Mapping between sectrion title and part of the docstring that deals with it. 

309 """ 

310 chunks: Mapping[str, str] = OrderedDict() 

311 matches = list(self.titles_re.finditer(meta_chunk)) 

312 if not matches: 

313 return chunks 

314 splits = [ 

315 (matches[j].end(), matches[j + 1].start()) for j in range(len(matches) - 1) 

316 ] 

317 splits.append((matches[-1].end(), len(meta_chunk))) 

318 for j, (start, end) in enumerate(splits): 

319 title = matches[j].group(1) 

320 if title not in self.sections: 320 ↛ 321line 320 didn't jump to line 321, because the condition on line 320 was never true

321 continue 

322 

323 # Clear Any Unknown Meta 

324 # Ref: https://github.com/rr-/docstring_parser/issues/29 

325 meta_details = meta_chunk[start:end] 

326 unknown_meta = re.search(r"\n\S", meta_details) 

327 if unknown_meta is not None: 

328 meta_details = meta_details[: unknown_meta.start()] 

329 

330 chunks[title] = meta_details.strip("\n") 

331 return chunks 

332 

333 def _determine_indent(self, chunk: str) -> str: 

334 """Determine indent. 

335 

336 Parameters 

337 ---------- 

338 chunk : str 

339 Chunk to determine the indent for. 

340 

341 Returns 

342 ------- 

343 str 

344 String representing the indent. 

345 

346 Raises 

347 ------ 

348 ParseError 

349 If no indent could be determined. 

350 """ 

351 indent_match = re.search(r"^\s*", chunk) 

352 if not indent_match: 352 ↛ 353line 352 didn't jump to line 353, because the condition on line 352 was never true

353 msg = f"Can't infer indent from '{chunk}'" 

354 raise ParseError(msg) 

355 return indent_match.group() 

356 

357 def _get_chunks(self, text: str) -> tuple[str, str]: 

358 """Split docstring into description and meta part. 

359 

360 Parameters 

361 ---------- 

362 text : str 

363 Docstring text to split. 

364 

365 Returns 

366 ------- 

367 tuple[str, str] 

368 Docstring representing the description and the rest. 

369 """ 

370 if match := self.titles_re.search(text): 

371 return text[: match.start()], text[match.start() :] 

372 return text, "" 

373 

374 def _get_multi_chunk_splits( 

375 self, chunk: str, title: str, indent: str 

376 ) -> list[tuple[int, int]]: 

377 """Get the starting and ending position for each element of a multi chunk. 

378 

379 Parameters 

380 ---------- 

381 chunk : str 

382 Full chunk to split. 

383 title : str 

384 Title of the section represented by the chunk. 

385 indent : str 

386 Indent before each element of the chunk. 

387 

388 Returns 

389 ------- 

390 list[tuple[int, int]] 

391 List of all start and end positions of each element of the chunk. 

392 

393 Raises 

394 ------ 

395 ParseError 

396 If no entry could be found with the expected indent. 

397 """ 

398 # Split based on lines which have exactly that indent 

399 c_matches = list(re.finditer(rf"^{indent}(?=\S)", chunk, flags=re.M)) 

400 if not c_matches: 

401 msg = f'No specification for "{title}": "{chunk}"' 

402 raise ParseError(msg) 

403 c_splits = [ 

404 (c_cur.end(), c_next.start()) 

405 for c_cur, c_next in zip(c_matches, c_matches[1:]) 

406 ] 

407 c_splits.append((c_matches[-1].end(), len(chunk))) 

408 return c_splits 

409 

410 def parse(self, text: Optional[str]) -> Docstring: 

411 """Parse the Google-style docstring into its components. 

412 

413 Parameters 

414 ---------- 

415 text : Optional[str] 

416 docstring text 

417 

418 Returns 

419 ------- 

420 Docstring 

421 parsed docstring 

422 

423 Raises 

424 ------ 

425 ParseError 

426 If no specification could be found for a title, chunk pair. 

427 """ 

428 ret = Docstring(style=DocstringStyle.GOOGLE) 

429 if not text: 

430 return ret 

431 

432 # Clean according to PEP-0257 

433 text = inspect.cleandoc(text) 

434 

435 desc_chunk, meta_chunk = self._get_chunks(text) 

436 

437 # Break description into short and long parts 

438 split_description(ret, desc_chunk) 

439 

440 # Split by sections determined by titles 

441 chunks = self._split_sections(meta_chunk) 

442 

443 if not chunks: 

444 return ret 

445 

446 # Add elements from each chunk 

447 for title, chunk in chunks.items(): 

448 # Determine indent 

449 indent = self._determine_indent(chunk) 

450 section = self.sections[title] 

451 # Check for singular elements 

452 if section.type_info == SectionType.SINGULAR: 

453 part = inspect.cleandoc(chunk) 

454 ret.meta.append(self._build_single_meta(section, part)) 

455 continue 

456 

457 # Split based on lines which have exactly that indent 

458 c_splits = self._get_multi_chunk_splits(chunk, title, indent) 

459 if section.type_info == SectionType.MULTIPLE: 

460 for start, end in c_splits: 

461 part = chunk[start:end].strip("\n") 

462 ret.meta.append(self._build_multi_meta(section, part)) 

463 else: # SectionType.SINGULAR_OR_MULTIPLE 

464 # Try to handle it as a multiple section with multiple entries 

465 try: 

466 metas = [ 

467 self._build_multi_meta(section, chunk[start:end].strip("\n")) 

468 for start, end in c_splits 

469 ] 

470 # Fall back to a singular entry for multi or single section 

471 except ParseError: 

472 part = inspect.cleandoc(chunk) 

473 if MULTIPLE_PATTERN.match(part): 

474 ret.meta.append(self._build_multi_meta(section, part)) 

475 else: 

476 ret.meta.append(self._build_single_meta(section, part)) 

477 else: 

478 ret.meta.extend(metas) 

479 return ret 

480 

481 

482def parse(text: Optional[str]) -> Docstring: 

483 """Parse the Google-style docstring into its components. 

484 

485 Parameters 

486 ---------- 

487 text : Optional[str] 

488 docstring text 

489 

490 Returns 

491 ------- 

492 Docstring 

493 parsed docstring 

494 """ 

495 return GoogleParser().parse(text) 

496 

497 

498def compose( # noqa: PLR0915 

499 docstring: Docstring, 

500 rendering_style: RenderingStyle = RenderingStyle.COMPACT, 

501 indent: str = " ", 

502) -> str: 

503 """Render a parsed docstring into docstring text. 

504 

505 Parameters 

506 ---------- 

507 docstring : Docstring 

508 parsed docstring representation 

509 rendering_style : RenderingStyle 

510 the style to render docstrings (Default value = RenderingStyle.COMPACT) 

511 indent : str 

512 the characters used as indentation in the 

513 docstring string (Default value = ' ') 

514 

515 Returns 

516 ------- 

517 str 

518 docstring text 

519 """ 

520 

521 def process_one(one: MainSections) -> None: 

522 """Build the output text for one entry in a section. 

523 

524 Parameters 

525 ---------- 

526 one : MainSections 

527 Docstring for which to build the raw text. 

528 """ 

529 head = "" 

530 

531 if isinstance(one, DocstringParam): 

532 head += one.arg_name or "" 

533 elif isinstance(one, DocstringReturns): 

534 head += one.return_name or "" 

535 elif isinstance(one, DocstringYields): 

536 head += one.yield_name or "" 

537 

538 if isinstance(one, DocstringParam) and one.is_optional: 

539 optional = ( 

540 "?" if rendering_style == RenderingStyle.COMPACT else ", optional" 

541 ) 

542 else: 

543 optional = "" 

544 

545 if one.type_name and head: 

546 head += f" ({one.type_name}{optional}):" 

547 elif one.type_name: 

548 head += f"{one.type_name}{optional}:" 

549 else: 

550 head += ":" 

551 head = indent + head 

552 

553 if one.description and rendering_style == RenderingStyle.EXPANDED: 

554 body = f"\n{indent}{indent}".join([head, *one.description.splitlines()]) 

555 parts.append(body) 

556 elif one.description: 

557 (first, *rest) = one.description.splitlines() 

558 body = f"\n{indent}{indent}".join([f"{head} {first}", *rest]) 

559 parts.append(body) 

560 else: 

561 parts.append(head) 

562 

563 def process_sect(name: str, args: Sequence[MainSections]) -> None: 

564 """Build the output for a docstring section. 

565 

566 Parameters 

567 ---------- 

568 name : str 

569 Section for which to build the output. 

570 args : Sequence[MainSections] 

571 List of individual elements of that section. 

572 """ 

573 if args: 

574 parts.append(name) 

575 for arg in args: 

576 process_one(arg) 

577 parts.append("") 

578 

579 parts: list[str] = [] 

580 append_description(docstring, parts) 

581 

582 process_sect("Args:", [p for p in docstring.params or [] if p.args[0] == "param"]) 

583 

584 process_sect( 

585 "Attributes:", 

586 [p for p in docstring.params or [] if p.args[0] == "attribute"], 

587 ) 

588 

589 process_sect( 

590 "Returns:", 

591 docstring.many_returns, 

592 ) 

593 

594 process_sect("Yields:", docstring.many_yields) 

595 

596 process_sect("Raises:", docstring.raises or []) 

597 

598 if docstring.returns and not docstring.many_returns: 598 ↛ 599line 598 didn't jump to line 599, because the condition on line 598 was never true

599 ret = docstring.returns 

600 parts.append("Yields:" if ret else "Returns:") 

601 parts.append("-" * len(parts[-1])) 

602 process_one(ret) 

603 

604 for meta in docstring.meta: 

605 if isinstance( 

606 meta, (DocstringParam, DocstringReturns, DocstringRaises, DocstringYields) 

607 ): 

608 continue # Already handled 

609 parts.append(meta.args[0].replace("_", "").title() + ":") 

610 if meta.description: 

611 lines = [indent + line for line in meta.description.splitlines()] 

612 parts.append("\n".join(lines)) 

613 parts.append("") 

614 

615 while parts and not parts[-1]: 

616 parts.pop() 

617 

618 return "\n".join(parts)