Coverage for src\llm_code_lens\analyzer\base.py: 82%
134 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
1from abc import ABC, abstractmethod
2from pathlib import Path
3from typing import Dict, List, Optional, Set, Tuple, Union
4from dataclasses import dataclass
6@dataclass
7class AnalysisResult:
8 """Container for analysis results."""
9 summary: dict
10 insights: List[str]
11 files: Dict[str, dict]
13 def to_text(self) -> str:
14 """Convert analysis to LLM-friendly text format."""
15 from ..formatters.llm import format_analysis
16 return format_analysis(self)
18 def to_json(self) -> str:
19 """Convert analysis to JSON format."""
20 import json
21 return json.dumps({
22 'summary': self.summary,
23 'insights': self.insights,
24 'files': self.files
25 }, indent=2)
27class BaseAnalyzer(ABC):
28 """Base class for all code analyzers."""
30 @abstractmethod
31 def analyze_file(self, file_path: Path) -> dict:
32 """
33 Analyze a file and return standardized analysis results.
35 Args:
36 file_path: Path to the file to analyze.
38 Returns:
39 dict with the following structure:
40 {
41 'type': str, # Analyzer type (e.g., 'python', 'sql')
42 'metrics': {
43 'loc': int, # Lines of code
44 'classes': int, # Number of classes
45 'functions': int, # Number of functions
46 'imports': int, # Number of imports
47 'complexity': int # Complexity metric
48 },
49 'imports': List[str], # List of import statements
50 'functions': List[dict], # List of function details
51 'classes': List[dict], # List of class details
52 'comments': List[dict], # List of comments
53 'todos': List[dict], # List of TODOs
54 'errors': List[dict], # Optional analysis errors
55 'full_content': str, # Optional full file content
56 }
58 Note:
59 - All fields are optional except 'type' and 'metrics'
60 - Language-specific analyzers may add additional fields
61 """
62 pass
64class ProjectAnalyzer:
65 """Main project analyzer that coordinates language-specific analyzers."""
67 def __init__(self):
68 self.analyzers = self._initialize_analyzers()
70 def _initialize_analyzers(self) -> Dict[str, BaseAnalyzer]:
71 """Initialize language-specific analyzers."""
72 from .python import PythonAnalyzer
73 from .javascript import JavaScriptAnalyzer
74 from .sql import SQLServerAnalyzer
76 return {
77 '.py': PythonAnalyzer(),
78 '.js': JavaScriptAnalyzer(),
79 '.jsx': JavaScriptAnalyzer(),
80 '.ts': JavaScriptAnalyzer(),
81 '.tsx': JavaScriptAnalyzer(),
82 '.sql': SQLServerAnalyzer(),
83 }
85 def analyze(self, path: Path) -> AnalysisResult:
86 """Analyze entire project directory."""
87 # Initialize analysis structure
88 analysis = {
89 'summary': {
90 'project_stats': {
91 'total_files': 0,
92 'by_type': {},
93 'lines_of_code': 0,
94 'avg_file_size': 0
95 },
96 'code_metrics': {
97 'functions': {'count': 0, 'with_docs': 0, 'complex': 0},
98 'classes': {'count': 0, 'with_docs': 0},
99 'imports': {'count': 0, 'unique': set()}
100 },
101 'maintenance': {
102 'todos': [],
103 'comments_ratio': 0,
104 'doc_coverage': 0
105 },
106 'structure': {
107 'directories': set(),
108 'entry_points': [],
109 'core_files': []
110 }
111 },
112 'insights': [],
113 'files': {}
114 }
116 # Collect analyzable files
117 files = self._collect_files(path)
118 analysis['summary']['project_stats']['total_files'] = len(files)
120 # Process each file
121 for file_path in files:
122 if analyzer := self.analyzers.get(file_path.suffix.lower()):
123 try:
124 file_analysis = analyzer.analyze_file(file_path)
125 str_path = str(file_path)
127 # Ensure file_analysis has required fields
128 if not isinstance(file_analysis, dict):
129 print(f"Error analyzing {file_path}: Invalid analysis result")
130 continue
132 if 'type' not in file_analysis:
133 file_analysis['type'] = file_path.suffix.lower().lstrip('.')
135 # Skip files with errors unless they have partial results
136 if 'errors' in file_analysis and not file_analysis.get('metrics', {}).get('loc', 0):
137 print(f"Error analyzing {file_path}: {file_analysis['errors']}")
138 continue
140 # Update file types count
141 ext = file_path.suffix
142 analysis['summary']['project_stats']['by_type'][ext] = \
143 analysis['summary']['project_stats']['by_type'].get(ext, 0) + 1
145 # Store file analysis
146 analysis['files'][str_path] = file_analysis
148 # Update metrics
149 self._update_metrics(analysis, file_analysis, str_path)
151 except Exception as e:
152 print(f"Error analyzing {file_path}: {e}")
153 continue
155 # Calculate final metrics
156 self._calculate_final_metrics(analysis)
158 # Generate insights
159 if insights_gen := analysis.get('summary', {}).get('insights_generator'):
160 analysis['insights'] = insights_gen(analysis)
161 else:
162 analysis['insights'] = self._generate_default_insights(analysis)
164 return AnalysisResult(**analysis)
166 def _collect_files(self, path: Path) -> List[Path]:
167 """Collect all analyzable files from directory."""
168 files = []
170 for file_path in path.rglob('*'):
171 if (file_path.is_file() and
172 file_path.suffix.lower() in self.analyzers):
173 files.append(file_path)
175 return files
177 def _update_metrics(self, analysis: dict, file_analysis: dict, file_path: str) -> None:
178 """Update project metrics with file analysis results."""
179 metrics = file_analysis.get('metrics', {})
181 # Update basic metrics
182 analysis['summary']['project_stats']['lines_of_code'] += metrics.get('loc', 0)
184 # Update function metrics
185 functions = file_analysis.get('functions', [])
186 analysis['summary']['code_metrics']['functions']['count'] += len(functions)
187 analysis['summary']['code_metrics']['functions']['with_docs'] += \
188 sum(1 for f in functions if f.get('docstring'))
189 analysis['summary']['code_metrics']['functions']['complex'] += \
190 sum(1 for f in functions if f.get('complexity', 0) > 5)
192 # Update class metrics
193 classes = file_analysis.get('classes', [])
194 analysis['summary']['code_metrics']['classes']['count'] += len(classes)
195 analysis['summary']['code_metrics']['classes']['with_docs'] += \
196 sum(1 for c in classes if c.get('docstring'))
198 # Update imports
199 imports = file_analysis.get('imports', [])
200 analysis['summary']['code_metrics']['imports']['count'] += len(imports)
201 analysis['summary']['code_metrics']['imports']['unique'].update(imports)
203 # Update structure info
204 dir_path = str(Path(file_path).parent)
205 analysis['summary']['structure']['directories'].add(dir_path)
207 # Update entry points
208 if self._is_entry_point(file_path, file_analysis):
209 analysis['summary']['structure']['entry_points'].append(file_path)
211 # Update core files
212 if self._is_core_file(file_analysis):
213 analysis['summary']['structure']['core_files'].append(file_path)
215 # Update maintenance info
216 for todo in file_analysis.get('todos', []):
217 analysis['summary']['maintenance']['todos'].append({
218 'file': file_path,
219 'line': todo.get('line', 0),
220 'text': todo.get('text', ''),
221 'priority': self._estimate_todo_priority(todo.get('text', ''))
222 })
224 def _calculate_final_metrics(self, analysis: dict) -> None:
225 """Calculate final metrics and handle serialization."""
226 total_files = analysis['summary']['project_stats']['total_files']
227 if total_files > 0:
228 # Calculate average file size
229 analysis['summary']['project_stats']['avg_file_size'] = \
230 analysis['summary']['project_stats']['lines_of_code'] / total_files
232 # Calculate documentation coverage
233 total_elements = (
234 analysis['summary']['code_metrics']['functions']['count'] +
235 analysis['summary']['code_metrics']['classes']['count']
236 )
237 if total_elements > 0:
238 documented = (
239 analysis['summary']['code_metrics']['functions']['with_docs'] +
240 analysis['summary']['code_metrics']['classes']['with_docs']
241 )
242 analysis['summary']['maintenance']['doc_coverage'] = \
243 (documented / total_elements) * 100
245 # Convert sets to lists for serialization
246 analysis['summary']['code_metrics']['imports']['unique'] = \
247 list(analysis['summary']['code_metrics']['imports']['unique'])
248 analysis['summary']['structure']['directories'] = \
249 list(analysis['summary']['structure']['directories'])
251 def _is_entry_point(self, file_path: str, analysis: dict) -> bool:
252 """Identify if a file is a potential entry point."""
253 filename = Path(file_path).name
254 if filename in ['main.py', 'app.py', 'cli.py', 'server.py', 'index.js', 'server.js']:
255 return True
257 # Check for main-like functions
258 for func in analysis.get('functions', []):
259 if func['name'] in ['main', 'run', 'start']:
260 return True
262 return False
264 def _is_core_file(self, analysis: dict) -> bool:
265 """Identify if a file is likely a core component."""
266 if len(analysis.get('functions', [])) > 5:
267 return True
268 if len(analysis.get('classes', [])) > 2:
269 return True
270 if analysis.get('metrics', {}).get('complexity', 0) > 20:
271 return True
272 return False
274 def _estimate_todo_priority(self, text: str) -> str:
275 """Estimate TODO priority based on content."""
276 text = text.lower()
277 if any(word in text for word in ['urgent', 'critical', 'fixme', 'bug']):
278 return 'high'
279 if any(word in text for word in ['important', 'needed', 'should']):
280 return 'medium'
281 return 'low'
283 def _generate_default_insights(self, analysis: dict) -> List[str]:
284 """Generate default insights from analysis results."""
285 insights = []
287 # Basic project stats
288 total_files = analysis['summary']['project_stats']['total_files']
289 insights.append(f"Project contains {total_files} analyzable files")
291 # Documentation insights
292 doc_coverage = analysis['summary']['maintenance']['doc_coverage']
293 if doc_coverage < 50:
294 insights.append(f"Low documentation coverage ({doc_coverage:.1f}%)")
295 elif doc_coverage > 80:
296 insights.append(f"Good documentation coverage ({doc_coverage:.1f}%)")
298 # Complexity insights
299 complex_funcs = analysis['summary']['code_metrics']['functions']['complex']
300 if complex_funcs > 0:
301 insights.append(f"Found {complex_funcs} complex functions that might need attention")
303 # TODO insights
304 todos = analysis['summary']['maintenance']['todos']
305 if todos:
306 high_priority = sum(1 for todo in todos if todo['priority'] == 'high')
307 if high_priority > 0:
308 insights.append(f"Found {high_priority} high-priority TODOs")
310 return insights