Coverage for src\llm_code_lens\analyzer\python.py: 80%
288 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
1import ast
2from pathlib import Path
3from typing import Dict, List, Optional, Set, Tuple, Union
4from dataclasses import dataclass
5from .base import BaseAnalyzer
7@dataclass
8class CodeLocation:
9 """Represents a location in source code."""
10 line: int
11 column: int
12 end_line: Optional[int] = None
13 end_column: Optional[int] = None
15@dataclass
16class ImportInfo:
17 """Information about an import statement."""
18 name: str
19 alias: Optional[str]
20 module: Optional[str]
21 is_relative: bool
22 location: CodeLocation
24@dataclass
25class FunctionArgument:
26 """Information about a function argument."""
27 name: str
28 type_annotation: Optional[str]
29 default_value: Optional[str]
30 is_kwonly: bool = False
31 is_vararg: bool = False
32 is_kwarg: bool = False
34@dataclass
35class FunctionInfo:
36 """Detailed information about a function."""
37 name: str
38 args: List[FunctionArgument]
39 return_type: Optional[str]
40 docstring: Optional[str]
41 decorators: List[str]
42 is_async: bool
43 location: CodeLocation
44 complexity: int
45 loc: int
47@dataclass
48class ClassInfo:
49 """Detailed information about a class."""
50 name: str
51 bases: List[str]
52 methods: List[str]
53 docstring: Optional[str]
54 decorators: List[str]
55 location: CodeLocation
56 complexity: int
58class PythonAnalyzer(BaseAnalyzer):
59 """Python-specific code analyzer using AST with enhanced features."""
61 def analyze_file(self, file_path: Path) -> dict:
62 """
63 Analyze a Python file and return detailed analysis results.
65 Args:
66 file_path: Path to the Python file to analyze.
68 Returns:
69 dict: Comprehensive analysis results including:
70 - Imports
71 - Functions (with args, types, etc.)
72 - Classes (with inheritance, methods)
73 - Docstrings and comments
74 - Complexity metrics
75 - TODOs and other markers
76 """
77 try:
78 with open(file_path, 'r', encoding='utf-8') as f:
79 content = f.read()
81 tree = ast.parse(content)
83 # Initialize analysis dictionary
84 analysis = {
85 'type': 'python',
86 'full_content': content,
87 'imports': [],
88 'functions': [],
89 'classes': [],
90 'comments': [],
91 'todos': [],
92 'metrics': {
93 'loc': len(content.splitlines()),
94 'classes': 0,
95 'functions': 0,
96 'imports': 0,
97 'complexity': 0
98 }
99 }
101 # Process each component
102 self._process_imports(tree, analysis)
103 self._process_functions(tree, analysis, content)
104 self._process_classes(tree, analysis, content)
105 self._process_comments(content, analysis)
107 # Calculate overall complexity
108 analysis['metrics']['complexity'] = self._calculate_module_complexity(tree)
110 return analysis
112 except SyntaxError as e:
113 # Return partial analysis for syntax errors
114 return {
115 'type': 'python',
116 'errors': [{
117 'type': 'syntax_error',
118 'line': e.lineno,
119 'offset': e.offset,
120 'text': str(e)
121 }],
122 'metrics': {
123 'loc': 0,
124 'classes': 0,
125 'functions': 0,
126 'imports': 0,
127 'complexity': 0
128 }
129 }
130 except Exception as e:
131 # Handle other errors gracefully
132 return {
133 'type': 'python',
134 'errors': [{
135 'type': 'analysis_error',
136 'text': str(e)
137 }],
138 'metrics': {
139 'loc': 0,
140 'classes': 0,
141 'functions': 0,
142 'imports': 0,
143 'complexity': 0
144 }
145 }
147 def _process_imports(self, tree: ast.AST, analysis: dict) -> None:
148 """Process imports and handle each import statement individually."""
149 unique_imports = set()
150 import_count = 0
152 for node in ast.walk(tree):
153 if isinstance(node, ast.Import):
154 for name in node.names:
155 import_count += 1
156 unique_imports.add(f"import {name.name}")
157 elif isinstance(node, ast.ImportFrom):
158 module = node.module or ''
159 level = '.' * node.level
161 # Group imports from same module together
162 for name in node.names:
163 import_count += 1
164 if name.asname:
165 unique_imports.add(f"from {level}{module} import {name.name} as {name.asname}")
166 else:
167 unique_imports.add(f"from {level}{module} import {name.name}")
169 analysis['metrics']['imports'] = import_count
170 analysis['imports'] = sorted(list(unique_imports))
176 def _process_functions(self, tree: ast.AST, analysis: dict, content: str) -> None:
177 """Extract and analyze function definitions."""
178 for node in ast.walk(tree):
179 if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
180 analysis['metrics']['functions'] += 1
182 # Extract function information
183 func_info = FunctionInfo(
184 name=node.name,
185 args=self._extract_function_args(node.args),
186 return_type=self._format_annotation(node.returns) if node.returns else None,
187 docstring=ast.get_docstring(node),
188 decorators=[self._format_decorator(d) for d in node.decorator_list],
189 is_async=isinstance(node, ast.AsyncFunctionDef),
190 location=CodeLocation(
191 line=node.lineno,
192 column=node.col_offset,
193 end_line=node.end_lineno,
194 end_column=node.end_col_offset
195 ),
196 complexity=self._calculate_function_complexity(node),
197 loc=len(node.body)
198 )
200 # Get function content
201 func_content = self._extract_source(content, func_info.location)
203 # Add to analysis
204 analysis['functions'].append({
205 'name': func_info.name,
206 'args': [self._format_argument(arg) for arg in func_info.args],
207 'return_type': func_info.return_type,
208 'docstring': func_info.docstring,
209 'decorators': func_info.decorators,
210 'is_async': func_info.is_async,
211 'content': func_content,
212 'loc': func_info.loc,
213 'line_number': func_info.location.line,
214 'complexity': func_info.complexity
215 })
217 def _extract_function_args(self, args: ast.arguments) -> List[FunctionArgument]:
218 """Extract function arguments with improved handling."""
219 arguments = []
221 # Handle positional-only arguments (Python 3.8+)
222 if hasattr(args, 'posonlyargs'):
223 for arg in args.posonlyargs:
224 arguments.append(self._create_argument(arg))
226 # Handle regular positional arguments
227 for arg in args.args:
228 # Skip self/cls for methods
229 if arg.arg in ('self', 'cls') and len(args.args) > 0:
230 continue
231 arguments.append(self._create_argument(arg))
233 # Add defaults for positional arguments
234 defaults_start = len(arguments) - len(args.defaults)
235 for i, default in enumerate(args.defaults):
236 if i + defaults_start >= 0: # Ensure valid index
237 arguments[defaults_start + i].default_value = self._format_annotation(default)
239 # Handle *args
240 if args.vararg:
241 arguments.append(FunctionArgument(
242 name=f"*{args.vararg.arg}",
243 type_annotation=self._format_annotation(args.vararg.annotation) if args.vararg.annotation else None,
244 default_value=None,
245 is_vararg=True
246 ))
248 # Handle keyword-only arguments
249 for arg in args.kwonlyargs:
250 arguments.append(self._create_argument(arg, is_kwonly=True))
252 # Add defaults for keyword-only arguments
253 for i, default in enumerate(args.kw_defaults):
254 if default and i < len(args.kwonlyargs):
255 arg_idx = len(arguments) - len(args.kw_defaults) + i
256 if arg_idx >= 0: # Ensure valid index
257 arguments[arg_idx].default_value = self._format_annotation(default)
259 # Handle **kwargs
260 if args.kwarg:
261 arguments.append(FunctionArgument(
262 name=f"**{args.kwarg.arg}",
263 type_annotation=self._format_annotation(args.kwarg.annotation) if args.kwarg.annotation else None,
264 default_value=None,
265 is_kwarg=True
266 ))
268 return arguments
270 def _create_argument(self, arg: ast.arg, is_kwonly: bool = False) -> FunctionArgument:
271 """Helper to create a FunctionArgument instance."""
272 return FunctionArgument(
273 name=arg.arg,
274 type_annotation=self._format_annotation(arg.annotation) if arg.annotation else None,
275 default_value=None,
276 is_kwonly=is_kwonly
277 )
280 def _process_classes(self, tree: ast.AST, analysis: dict, content: str) -> None:
281 """Extract and analyze class definitions."""
282 for node in ast.walk(tree):
283 if isinstance(node, ast.ClassDef):
284 analysis['metrics']['classes'] += 1
286 # Get class information
287 class_info = ClassInfo(
288 name=node.name,
289 bases=self._extract_base_classes(node),
290 methods=self._extract_class_methods(node),
291 docstring=ast.get_docstring(node),
292 decorators=[self._format_decorator(d) for d in node.decorator_list],
293 location=CodeLocation(
294 line=node.lineno,
295 column=node.col_offset,
296 end_line=node.end_lineno,
297 end_column=node.end_col_offset
298 ),
299 complexity=self._calculate_class_complexity(node)
300 )
302 # Add to analysis
303 analysis['classes'].append({
304 'name': class_info.name,
305 'bases': class_info.bases,
306 'methods': class_info.methods,
307 'docstring': class_info.docstring,
308 'decorators': class_info.decorators,
309 'line_number': class_info.location.line,
310 'complexity': class_info.complexity
311 })
313 def _extract_class_methods(self, node: ast.ClassDef) -> List[Dict]:
314 """Extract detailed method information from a class."""
315 methods = []
317 for item in node.body:
318 if isinstance(item, ast.FunctionDef):
319 method_info = {
320 'name': item.name,
321 'docstring': ast.get_docstring(item),
322 'decorators': [self._format_decorator(d) for d in item.decorator_list],
323 'is_property': self._is_property(item),
324 'is_classmethod': self._is_classmethod(item),
325 'is_staticmethod': self._is_staticmethod(item),
326 'line_number': item.lineno
327 }
328 methods.append(method_info)
330 return methods
332 def _process_comments(self, content: str, analysis: dict) -> None:
333 """Extract and categorize comments and TODOs."""
334 lines = content.split('\n')
336 # Track multiline strings/comments
337 in_multiline = False
338 multiline_content = []
339 multiline_start = 0
341 for i, line in enumerate(lines, 1):
342 stripped = line.strip()
344 # Handle multiline strings that might be docstrings
345 if stripped.startswith('"""') or stripped.startswith("'''"):
346 if not in_multiline and not (stripped.endswith('"""') or stripped.endswith("'''")):
347 in_multiline = True
348 multiline_start = i
349 multiline_content = [stripped]
350 continue
351 elif in_multiline:
352 in_multiline = False
353 multiline_content.append(stripped)
354 # Only process if it's a comment, not a docstring
355 if not self._is_docstring(content, multiline_start):
356 comment_text = '\n'.join(multiline_content)
357 self._add_comment_or_todo(comment_text, multiline_start, analysis)
358 continue
360 if in_multiline:
361 multiline_content.append(stripped)
362 continue
364 # Handle single line comments
365 if stripped.startswith('#'):
366 comment_text = stripped[1:].strip()
367 self._add_comment_or_todo(comment_text, i, analysis)
369 def _calculate_function_complexity(self, node: ast.FunctionDef) -> int:
370 """Calculate cyclomatic complexity for a function."""
371 complexity = 1 # Base complexity
373 for child in ast.walk(node):
374 # Control flow increases complexity
375 if isinstance(child, (ast.If, ast.While, ast.For, ast.AsyncFor)):
376 complexity += 1
377 elif isinstance(child, ast.ExceptHandler):
378 complexity += 1
379 elif isinstance(child, ast.BoolOp):
380 if isinstance(child.op, ast.And):
381 complexity += len(child.values) - 1
382 elif isinstance(child, ast.Return):
383 if isinstance(child.value, ast.IfExp):
384 complexity += 1
386 return complexity
388 def _calculate_class_complexity(self, node: ast.ClassDef) -> int:
389 """Calculate complexity for a class."""
390 complexity = len(node.bases) # Inheritance adds complexity
392 # Add complexity of methods
393 for child in node.body:
394 if isinstance(child, ast.FunctionDef):
395 complexity += self._calculate_function_complexity(child)
397 return complexity
399 def _calculate_module_complexity(self, tree: ast.AST) -> int:
400 """Calculate overall module complexity."""
401 complexity = 0
403 # Add complexity of all functions and classes
404 for node in ast.walk(tree):
405 if isinstance(node, ast.FunctionDef):
406 complexity += self._calculate_function_complexity(node)
407 elif isinstance(node, ast.ClassDef):
408 complexity += self._calculate_class_complexity(node)
410 return complexity
412 def _extract_base_classes(self, node: ast.ClassDef) -> List[str]:
413 """Extract and format base class information."""
414 bases = []
415 for base in node.bases:
416 if isinstance(base, ast.Name):
417 bases.append(base.id)
418 elif isinstance(base, ast.Attribute):
419 bases.append(f"{self._format_dotted_name(base)}")
420 elif isinstance(base, ast.Call):
421 # Handle metaclasses and parameterized bases
422 if isinstance(base.func, ast.Name):
423 bases.append(f"{base.func.id}(...)")
424 elif isinstance(base.func, ast.Attribute):
425 bases.append(f"{self._format_dotted_name(base.func)}(...)")
426 return bases
428 def _format_dotted_name(self, node: ast.Attribute) -> str:
429 """Format attribute access into dotted name."""
430 parts = []
431 current = node
432 while isinstance(current, ast.Attribute):
433 parts.append(current.attr)
434 current = current.value
435 if isinstance(current, ast.Name):
436 parts.append(current.id)
437 return '.'.join(reversed(parts))
439 def _format_annotation(self, node: Optional[ast.AST]) -> Optional[str]:
440 """Format type annotations into string representation."""
441 if node is None:
442 return None
444 if isinstance(node, ast.Name):
445 return node.id
446 elif isinstance(node, ast.Attribute):
447 return self._format_dotted_name(node)
448 elif isinstance(node, ast.Subscript):
449 value = self._format_annotation(node.value)
450 if isinstance(node.slice, ast.Index):
451 # Handle Python 3.8 style annotations
452 slice_value = self._format_annotation(node.slice.value)
453 else:
454 # Handle Python 3.9+ style annotations
455 slice_value = self._format_annotation(node.slice)
456 return f"{value}[{slice_value}]"
457 elif isinstance(node, ast.Tuple):
458 elements = [self._format_annotation(elt) for elt in node.elts]
459 return f"Tuple[{', '.join(elements)}]"
460 elif isinstance(node, ast.List):
461 elements = [self._format_annotation(elt) for elt in node.elts]
462 return f"List[{', '.join(elements)}]"
463 elif isinstance(node, ast.Constant):
464 return repr(node.value)
465 elif isinstance(node, ast.BinOp):
466 if isinstance(node.op, ast.BitOr):
467 left = self._format_annotation(node.left)
468 right = self._format_annotation(node.right)
469 return f"Union[{left}, {right}]"
470 elif isinstance(node, ast.Index):
471 # Handle Python 3.8 style index nodes directly
472 return self._format_annotation(node.value)
473 return str(node)
477 def _format_import(self, import_info: ImportInfo) -> str:
478 """Format import information into string representation."""
479 if import_info.module:
480 result = f"from {import_info.module} import {import_info.name}"
481 else:
482 result = f"import {import_info.name}"
484 if import_info.alias:
485 result += f" as {import_info.alias}"
487 return result
489 def _format_argument(self, arg: FunctionArgument) -> str:
490 """Format function argument into string representation."""
491 parts = []
493 # Handle special argument types
494 if arg.is_vararg:
495 parts.append('*' + arg.name)
496 elif arg.is_kwarg:
497 parts.append('**' + arg.name)
498 else:
499 parts.append(arg.name)
501 # Add type annotation if present
502 if arg.type_annotation:
503 parts[0] += f": {arg.type_annotation}"
505 # Add default value if present
506 if arg.default_value:
507 parts[0] += f" = {arg.default_value}"
509 return parts[0]
511 def _format_decorator(self, node: ast.expr) -> str:
512 """Format decorator into string representation."""
513 if isinstance(node, ast.Name):
514 return node.id
515 elif isinstance(node, ast.Call):
516 if isinstance(node.func, ast.Name):
517 return f"{node.func.id}(...)"
518 elif isinstance(node.func, ast.Attribute):
519 return f"{self._format_dotted_name(node.func)}(...)"
520 elif isinstance(node, ast.Attribute):
521 return self._format_dotted_name(node)
522 return "unknown_decorator"
524 def _extract_source(self, content: str, location: CodeLocation) -> str:
525 """Extract source code for a node based on its location."""
526 lines = content.splitlines()
527 if location.end_line:
528 return '\n'.join(lines[location.line-1:location.end_line])
529 return lines[location.line-1]
531 def _is_docstring(self, content: str, line_number: int) -> bool:
532 """Check if a multiline string is a docstring."""
533 lines = content.splitlines()
535 # Look for the previous non-empty line
536 current_line = line_number - 2 # -2 because line_number is 1-based
537 while current_line >= 0 and not lines[current_line].strip():
538 current_line -= 1
540 if current_line < 0:
541 return True # Module-level docstring
543 prev_line = lines[current_line].strip()
544 return prev_line.endswith(':') or prev_line.startswith('@')
546 def _add_comment_or_todo(self, text: str, line: int, analysis: dict) -> None:
547 """Add a comment as either a regular comment or TODO based on content."""
548 text = text.strip()
549 if any(marker in text.upper() for marker in ['TODO', 'FIXME', 'XXX']):
550 analysis['todos'].append({
551 'text': text,
552 'line': line
553 })
554 else:
555 analysis['comments'].append({
556 'text': text,
557 'line': line
558 })
560 def _is_property(self, node: ast.FunctionDef) -> bool:
561 """Check if a method is a property."""
562 return any(
563 self._format_decorator(d) in {'property', 'cached_property'}
564 for d in node.decorator_list
565 )
567 def _is_classmethod(self, node: ast.FunctionDef) -> bool:
568 """Check if a method is a classmethod."""
569 return any(
570 self._format_decorator(d) == 'classmethod'
571 for d in node.decorator_list
572 )
574 def _is_staticmethod(self, node: ast.FunctionDef) -> bool:
575 """Check if a method is a staticmethod."""
576 return any(
577 self._format_decorator(d) == 'staticmethod'
578 for d in node.decorator_list
579 )