Coverage for pymend\pymend.py: 63%

210 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-04-20 19:09 +0200

1"""Module for general management of writing docstrings of multiple files.""" 

2 

3import ast 

4import platform 

5import sys 

6import tempfile 

7import traceback 

8from dataclasses import dataclass 

9from pathlib import Path 

10from typing import NamedTuple 

11 

12from click import echo 

13 

14import pymend.docstring_parser as dsp 

15 

16from .file_parser import AstAnalyzer 

17from .output import diff 

18from .report import Changed 

19from .types import ElementDocstring, FixerSettings 

20 

21__author__ = "J-E. Nitschke" 

22__copyright__ = "Copyright 2012-2021 A. Daouzli" 

23__licence__ = "GPL3" 

24__version__ = "1.0.10" 

25__maintainer__ = "J-E. Nitschke" 

26 

27 

28@dataclass 

29class FileContentRepresentation: 

30 """Container for str and list representation of file contents.""" 

31 

32 lst: list[str] 

33 lines: str 

34 

35 

36class Styles(NamedTuple): 

37 """Container for input and output style.""" 

38 

39 input_style: dsp.DocstringStyle 

40 output_style: dsp.DocstringStyle 

41 

42 

43class PyComment: 

44 """Manage several python scripts docstrings. 

45 

46 It is used to parse and rewrite in a Pythonic way all the 

47 functions', methods' and classes' docstrings. 

48 The changes are then provided in a patch file. 

49 """ 

50 

51 def __init__( 

52 self, 

53 input_file: Path, 

54 *, 

55 fixer_settings: FixerSettings, 

56 output_style: dsp.DocstringStyle = dsp.DocstringStyle.NUMPYDOC, 

57 input_style: dsp.DocstringStyle = dsp.DocstringStyle.AUTO, 

58 proceed_directly: bool = True, 

59 ) -> None: 

60 r"""Set the configuration including the source to proceed and options. 

61 

62 Parameters 

63 ---------- 

64 input_file : Path 

65 path name (file or folder) 

66 fixer_settings : FixerSettings 

67 Settings for which fixes should be performed. 

68 output_style : dsp.DocstringStyle 

69 Output style to use for docstring. 

70 (Default value = dsp.DocstringStyle.NUMPYDOC) 

71 input_style : dsp.DocstringStyle 

72 Input docstring style. 

73 Auto means that the style is detected automatically. Can cause issues when 

74 styles are mixed in examples or descriptions." 

75 (Default value = dsp.DocstringStyle.AUTO) 

76 proceed_directly : bool 

77 Whether the file should be parsed directly with the call of 

78 the constructor. (Default value = True) 

79 """ 

80 self.input_file = input_file 

81 self.style = Styles(input_style, output_style) 

82 input_lines = self.input_file.read_text(encoding="utf-8") 

83 self._input = FileContentRepresentation( 

84 input_lines.splitlines(keepends=True), input_lines 

85 ) 

86 self._output = FileContentRepresentation([], "") 

87 self.settings = fixer_settings 

88 self._changed = [] 

89 self.docs_list = [] 

90 self.fixed = False 

91 if proceed_directly: 91 ↛ exitline 91 didn't return from function '__init__', because the condition on line 91 was never false

92 self.proceed() 

93 

94 def proceed(self) -> None: 

95 """Parse file and generates/converts the docstrings.""" 

96 self._parse() 

97 self._compute_before_after() 

98 

99 def _parse(self) -> list[ElementDocstring]: 

100 """Parse input file's content and generates a list of its elements/docstrings. 

101 

102 Returns 

103 ------- 

104 list[ElementDocstring] 

105 List of information about module, classes and functions. 

106 """ 

107 ast_parser = AstAnalyzer(self._input.lines, settings=self.settings) 

108 self.docs_list = sorted( 

109 ast_parser.parse_from_ast(), key=lambda element: element.lines 

110 ) 

111 return self.docs_list 

112 

113 def _compute_before_after(self) -> tuple[list[str], list[str], list[str]]: 

114 r"""Compute the before and after and assert equality and stability. 

115 

116 Make sure that pymend is idempotent. 

117 Make sure that the original and final Ast's are the same (except for docstring.) 

118 

119 Returns 

120 ------- 

121 tuple[list[str], list[str], list[str]] 

122 Tuple of before, after, changed, 

123 """ 

124 list_from, list_to, list_changed = self._get_changes() 

125 

126 self._output.lst = list_to 

127 self._output.lines = "".join(list_to) 

128 self._changed = list_changed 

129 

130 self.assert_stability(list_from, list_to) 

131 self.assert_equality(self._input.lines, self._output.lines) 

132 self.fixed = True 

133 return list_from, list_to, list_changed 

134 

135 def _get_changes(self) -> tuple[list[str], list[str], list[str]]: 

136 r"""Compute the list of lines before and after the proposed docstring changes. 

137 

138 Elements of the list already contain '\n' at the end. 

139 

140 Returns 

141 ------- 

142 list_from : list[str] 

143 Original file as list of lines. 

144 list_to : list[str] 

145 Modified content as list of lines. 

146 list_changed : list[str] 

147 List of names of elements that were changed. 

148 

149 Raises 

150 ------ 

151 ValueError 

152 If the endline of a docstring was parsed as None. 

153 """ 

154 list_from = self._input.lst 

155 list_to: list[str] = [] 

156 list_changed: list[str] = [] 

157 last = 0 

158 # Loop over all found docstrings and replace the lines where they used to 

159 # (or ought to) be with the new docstring. 

160 for e in self.docs_list: 

161 start, end = e.lines 

162 if end is None: 162 ↛ 163line 162 didn't jump to line 163, because the condition on line 162 was never true

163 log = self.dump_to_file( 

164 "INTERNAL ERROR: End of docstring is None." 

165 " Not sure what to do with this yet.", 

166 "Original file:.\n", 

167 "".join(list_from), 

168 "Problematic element:\n", 

169 repr(e), 

170 ) 

171 msg = ( 

172 "INTERNAL ERROR: End of docstring is None." 

173 " Not sure what to do with this yet." 

174 " Please report a bug on" 

175 " https://github.com/JanEricNitschke/pymend/issues." 

176 f" This diff might be helpful: {log}" 

177 ) 

178 raise ValueError(msg) 

179 # e.line are line number starting at one. 

180 # We are now using them to index into a list starting at 0. 

181 start, end = start - 1, end - 1 

182 

183 # Grab output docstring and add quotes, indentation and modifiers 

184 in_docstring = e.docstring 

185 # Do not need to worry about start being out of range 

186 # if there was a docstring then it points to that. 

187 # If there wasnt then there should still be at least one line 

188 # after the function/class definition. Otherwise that would 

189 # already have raised an error earlier. 

190 old_line = list_from[start] 

191 leading_whitespace = old_line[: -len(old_line.lstrip())] 

192 trailing_comment = self._get_trailing_comment(list_from[end]) 

193 out_docstring = self._finalizes( 

194 docstring=e.output_docstring( 

195 output_style=self.style.output_style, 

196 input_style=self.style.input_style, 

197 settings=self.settings, 

198 ), 

199 indentation=leading_whitespace, 

200 modifier=e.modifier, 

201 trailing=trailing_comment, 

202 ) 

203 # Check if the docstring changed and if so, add it to the list of changed 

204 # We can not directly compare with the original out_docstring 

205 # because that is missing indentation. 

206 # And it is easiest to add the quotes, modifiers, trailings 

207 # in one go with the indentation. So for this comparison we have to 

208 # strip them away again. 

209 if ( 

210 in_docstring 

211 != out_docstring.strip()[ 

212 3 + len(e.modifier) : -(3 + len(trailing_comment)) 

213 ] 

214 ): 

215 list_changed.append(e.name) 

216 

217 # Add all the unchanged things between last and current docstring 

218 list_to.extend(list_from[last:start]) 

219 # Add the new docstring 

220 list_to.extend(out_docstring.splitlines(keepends=True)) 

221 # If there was no old docstring then we need to make sure we 

222 # do not remove the content that was originally on the first line 

223 # of element. 

224 if not in_docstring: 

225 list_to.append(old_line) 

226 last = end + 1 

227 # Add the rest of the file. 

228 if last < len(list_from): 

229 list_to.extend(list_from[last:]) 

230 return list_from, list_to, list_changed 

231 

232 def _get_trailing_comment(self, line: str) -> str: 

233 """Grab any trailing comment that was potentially at the last line. 

234 

235 Parameters 

236 ---------- 

237 line : str 

238 The last line of the docstring. 

239 

240 Returns 

241 ------- 

242 str 

243 The trailing comment 

244 """ 

245 # This might need some work in the future if there are both 

246 # types in the same line. 

247 line = line.strip() 

248 closing_quotes = max(line.rfind('"""'), line.rfind("'''")) 

249 if closing_quotes == -1: 

250 return "" 

251 return line[closing_quotes + 3 :] 

252 

253 def _finalizes( 

254 self, 

255 docstring: str, 

256 quotes: str = '"""', 

257 indentation: str = " ", 

258 modifier: str = "", 

259 trailing: str = "", 

260 ) -> str: 

261 r"""Add quotes, indentation and modifiers to the docstring. 

262 

263 Parameters 

264 ---------- 

265 docstring : str 

266 The raw docstring to complete. 

267 quotes : str 

268 Quotes to use for the docstring. (Default value = '\"\"\"') 

269 indentation : str 

270 How much to indent the docstring lines (Default value = ' ') 

271 modifier : str 

272 Modifier to put before the opening triple quotes. 

273 Any combination of ("r", "f", "u") (Default value = '') 

274 trailing : str 

275 Any trailing comment was after the original docstring but on 

276 the same line. (Default value = '') 

277 

278 Returns 

279 ------- 

280 str 

281 The properly indented docstring, wrapped in triple quotes 

282 and preceded by the desired modifier. 

283 """ 

284 split = f"{modifier}{quotes}{docstring}".splitlines() 

285 # One line docstring get the quotes on the same line 

286 if len(split) > 1: 

287 split.append(quotes) 

288 # Multi-line get them on the next 

289 else: 

290 split[0] += quotes 

291 for index, line in enumerate(split): 

292 if line.strip(): 

293 split[index] = indentation + line 

294 return "\n".join(split) + trailing + "\n" 

295 

296 def assert_stability(self, src: list[str], dst: list[str]) -> None: 

297 """Assert that running pymend on its own output does not change anything. 

298 

299 Parameters 

300 ---------- 

301 src : list[str] 

302 List of lines from the input file. 

303 dst : list[str] 

304 List of lines that pymend produced. 

305 

306 Raises 

307 ------ 

308 AssertionError 

309 If a second run of pymend produces a different output than the first. 

310 """ 

311 # pylint: disable=protected-access 

312 comment = self.__copy_from_output() 

313 comment._parse() # noqa: SLF001 

314 before, after, changed = comment._get_changes() # noqa: SLF001 

315 if changed or not (dst == before and dst == after): 315 ↛ 316line 315 didn't jump to line 316, because the condition on line 315 was never true

316 log = self.dump_to_file( 

317 "INTERNAL ERROR: PyMend produced different " 

318 "docstrings on the second pass.\n" 

319 "Changed:\n", 

320 "\n".join(changed), 

321 "".join(diff(src, dst, "source", "first pass")), 

322 "".join(diff(dst, after, "first pass", "second pass")), 

323 ) 

324 msg = ( 

325 "INTERNAL ERROR:" 

326 " PyMend produced different docstrings on the second pass." 

327 " Please report a bug on" 

328 " https://github.com/JanEricNitschke/pymend/issues." 

329 f" This diff might be helpful: {log}" 

330 ) 

331 raise AssertionError(msg) 

332 

333 def assert_equality(self, src_lines: str, dst_lines: str) -> None: 

334 """Assert that running pymend does not change functional ast. 

335 

336 Done by comparing the asts for the original and produced outputs 

337 while ignoring the docstrings themselves. 

338 

339 Parameters 

340 ---------- 

341 src_lines : str 

342 Lines from the input file. 

343 dst_lines : str 

344 Lines that pymend produced. 

345 

346 Raises 

347 ------ 

348 AssertionError 

349 If the content of the input file could not be parsed into an ast. 

350 AssertionError 

351 If the output from pymend could not be parsed into an ast. 

352 AssertionError 

353 If the output from pymend produces a different (reduced) ast 

354 than the input. 

355 """ 

356 try: 

357 src_ast = ast.parse(src_lines) 

358 except Exception as exc: # noqa: BLE001 

359 msg = f"Failed to parse source file AST: {exc}\n" 

360 raise AssertionError(msg) from exc 

361 try: 

362 dst_ast = ast.parse(dst_lines) 

363 except Exception as exc: # noqa: BLE001 

364 log = self.dump_to_file( 

365 "INTERNAL ERROR: PyMend produced invalid code:\n", 

366 "".join(traceback.format_tb(exc.__traceback__)), 

367 dst_lines, 

368 ) 

369 msg = ( 

370 f"INTERNAL ERROR: PyMend produced invalid code: {exc}. " 

371 "Please report a bug on" 

372 " https://github.com/JanEricNitschke/pymend/issues." 

373 f" This invalid output might be helpful: {log}" 

374 ) 

375 raise AssertionError(msg) from None 

376 src_ast_list = self._stringify_ast(src_ast) 

377 dst_ast_list = self._stringify_ast(dst_ast) 

378 if src_ast_list != dst_ast_list: 378 ↛ 379line 378 didn't jump to line 379, because the condition on line 378 was never true

379 log = self.dump_to_file( 

380 "INTERNAL ERROR: PyMend produced code " 

381 "that is not equivalent to the source\n", 

382 "".join(diff(src_ast_list, dst_ast_list, "src", "dst")), 

383 ) 

384 msg = ( 

385 "INTERNAL ERROR: PyMend produced code that is not equivalent to the" 

386 " source. Please report a bug on " 

387 "https://github.com/JanEricNitschke/pymend/issues." 

388 f" This diff might be helpful: {log}" 

389 ) 

390 raise AssertionError(msg) from None 

391 

392 def __copy_from_output(self) -> "PyComment": 

393 """Create a new PyComment with the same output style and lines from the input. 

394 

395 Parameters 

396 ---------- 

397 lines : list[str] 

398 List of lines that should make up the `input_lines` of the copied 

399 instance. 

400 

401 Returns 

402 ------- 

403 'PyComment' 

404 The new instance with the same output style and lines initialized 

405 by the `lines` argument. 

406 """ 

407 # pylint: disable=protected-access 

408 py_comment = PyComment.__new__(PyComment) 

409 py_comment._input = FileContentRepresentation( # noqa: SLF001 

410 self._output.lst.copy(), self._output.lines 

411 ) 

412 py_comment.settings = self.settings 

413 py_comment._output = FileContentRepresentation([], "") # noqa: SLF001 

414 py_comment.style = self.style 

415 py_comment.docs_list = [] 

416 return py_comment 

417 

418 def _strip_ast(self, ast_node: ast.AST) -> None: 

419 """Remove all docstrings from the ast. 

420 

421 Parameters 

422 ---------- 

423 ast_node : ast.AST 

424 Node representing the full ast. 

425 """ 

426 for node in ast.walk(ast_node): 

427 # let's work only on functions & classes definitions 

428 if not isinstance( 

429 node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef, ast.Module) 

430 ): 

431 continue 

432 

433 if not node.body: 433 ↛ 434line 433 didn't jump to line 434, because the condition on line 433 was never true

434 continue 

435 

436 if not isinstance(first_element := node.body[0], ast.Expr): 

437 continue 

438 

439 if not isinstance(docnode := first_element.value, ast.Constant): 

440 continue 

441 

442 if not isinstance(docnode.value, str): 

443 continue 

444 

445 node.body = node.body[1:] 

446 

447 def _stringify_ast(self, node: ast.AST) -> list[str]: 

448 """Turn ast into string representation with all docstrings removed. 

449 

450 Parameters 

451 ---------- 

452 node : ast.AST 

453 Node to turn into a reduced string representation. 

454 

455 Returns 

456 ------- 

457 list[str] 

458 List of lines making up the reduced string representation. 

459 """ 

460 self._strip_ast(node) 

461 return ast.dump(node, indent=1).splitlines(keepends=True) 

462 

463 def dump_to_file(self, *output: str, ensure_final_newline: bool = True) -> str: 

464 """Dump `output` to a temporary file. Return path to the file. 

465 

466 Parameters 

467 ---------- 

468 *output : str 

469 List of strings to dump into the output. 

470 ensure_final_newline : bool 

471 Whether to make sure that every dumped string 

472 ends in a new line. (Default value = True) 

473 

474 Returns 

475 ------- 

476 str 

477 Path to the produced temp file. 

478 """ 

479 with tempfile.NamedTemporaryFile( 

480 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8" 

481 ) as f: 

482 for lines in output: 

483 f.write(lines) 

484 if ensure_final_newline and lines and lines[-1] != "\n": 

485 f.write("\n") 

486 return f.name 

487 

488 def _docstring_diff(self) -> list[str]: 

489 """Build the diff between original docstring and proposed docstring. 

490 

491 Returns 

492 ------- 

493 list[str] 

494 The resulting diff 

495 """ 

496 return diff( 

497 self._input.lst, 

498 self._output.lst, 

499 f"a/{self.input_file}", 

500 f"b/{self.input_file}", 

501 ) 

502 

503 def output_patch(self) -> Changed: 

504 """Output the patch. Either to stdout or a file depending on input file. 

505 

506 Returns 

507 ------- 

508 Changed 

509 Whether there were any changes. 

510 """ 

511 if not self.fixed: 

512 self.proceed() 

513 if self._changed: 

514 lines_to_write = self._get_patch_lines() 

515 

516 if self.input_file.name == "-": 

517 sys.stdout.writelines(lines_to_write) 

518 else: 

519 self._write_patch_file(lines_to_write) 

520 return Changed.YES if bool(self._changed) else Changed.NO 

521 

522 def output_fix(self) -> Changed: 

523 """Output the fixed file. Either to stdout or the file. 

524 

525 Returns 

526 ------- 

527 Changed 

528 Whether there were any changes. 

529 

530 Raises 

531 ------ 

532 AssertionError 

533 If the input and output lines are identical but pymend reports 

534 some elements to have changed. 

535 """ 

536 if not self.fixed: 

537 self.proceed() 

538 if (self._input.lines == self._output.lines) != (len(self._changed) == 0): 

539 log = self.dump_to_file( 

540 "INTERNAL ERROR: " 

541 "Elements having changed does not line up with list of changed " 

542 "elements.\n", 

543 "List of changed elements:\n", 

544 "\n".join(self._changed), 

545 "Diff\n", 

546 "".join(self._docstring_diff()), 

547 ) 

548 msg = ( 

549 "INTERNAL ERROR: " 

550 "Elements having changed does not line up with list of changed" 

551 " elements." 

552 " Please report a bug on" 

553 " https://github.com/JanEricNitschke/pymend/issues." 

554 f" This invalid output might be helpful: {log}" 

555 ) 

556 raise AssertionError(msg) 

557 if self.input_file.name == "-": 

558 sys.stdout.writelines(self._output.lst) 

559 elif self._input.lines != self._output.lines: 

560 echo( 

561 "Modified docstrings of element" 

562 f'{"s" if len(self._changed) > 1 else ""} ' 

563 f'({", ".join(self._changed)}) in file {self.input_file}.' 

564 ) 

565 self._overwrite_source_file() 

566 return Changed.YES if bool(self._changed) else Changed.NO 

567 

568 def _get_patch_lines(self) -> list[str]: 

569 r"""Return the diff between source_path and target_path. 

570 

571 Parameters 

572 ---------- 

573 source_path : str 

574 name of the original file (Default value = '') 

575 target_path : str 

576 name of the final file (Default value = '') 

577 

578 Returns 

579 ------- 

580 list[str] 

581 the diff as a list of \n terminated lines 

582 """ 

583 return [ 

584 f"# Patch generated by Pymend v{__version__}\n\n", 

585 *self._docstring_diff(), 

586 ] 

587 

588 def _write_patch_file(self, lines_to_write: list[str]) -> None: 

589 r"""Write lines_to_write to a the file called patch_file. 

590 

591 Parameters 

592 ---------- 

593 lines_to_write : list[str] 

594 lines to write to the file - they should be \n terminated 

595 """ 

596 # Change this if pathlib ever gets a `append_suffix` method 

597 # To Path(self.input_file).append_suffix(".patch") 

598 with Path(f"{Path(self.input_file).name}.patch").open( 

599 "w", encoding="utf-8" 

600 ) as file: 

601 file.writelines(lines_to_write) 

602 

603 def _overwrite_source_file(self) -> None: 

604 r"""Overwrite the file with line_to_write. 

605 

606 Parameters 

607 ---------- 

608 lines_to_write : list[str] 

609 lines to write to the file - they should be \n terminated 

610 """ 

611 tmp_filename = Path(f"{self.input_file}.writing") 

612 ok = False 

613 try: 

614 with tmp_filename.open("w", encoding="utf-8") as file: 

615 file.writelines(self._output.lines) 

616 ok = True 

617 finally: 

618 if ok: 

619 if platform.system() == "Windows": 

620 self._windows_rename(tmp_filename) 

621 else: 

622 tmp_filename.rename(self.input_file) 

623 else: 

624 tmp_filename.unlink() 

625 

626 def _windows_rename(self, tmp_filename: Path) -> None: 

627 """Workaround the fact that os.rename raises an OSError on Windows. 

628 

629 Parameters 

630 ---------- 

631 tmp_filename : Path 

632 The file to rename 

633 """ 

634 input_file = Path(self.input_file) 

635 if input_file.is_file(): 635 ↛ 637line 635 didn't jump to line 637, because the condition on line 635 was never false

636 input_file.unlink() 

637 tmp_filename.rename(input_file) 

638 

639 def report_issues(self) -> tuple[int, str]: 

640 """Produce a report of all found issues with the docstrings in the file. 

641 

642 Returns 

643 ------- 

644 tuple[int, str] 

645 The number of elements that had issues as well as 

646 a string representation of those. 

647 """ 

648 issues: list[str] = [] 

649 for elem in self.docs_list: 

650 n_issues, report = elem.report_issues() 

651 if n_issues: 

652 issues.append(report) 

653 if not issues: 

654 return 0, "" 

655 report = ( 

656 f"{'*'*50}\nThe following issues were found in file {self.input_file}:\n" 

657 + "\n".join(issues) 

658 ) 

659 return len(issues), report