Coverage for src\llm_code_lens\analyzer\base.py: 82%

134 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-01-12 10:23 +0200

1from abc import ABC, abstractmethod 

2from pathlib import Path 

3from typing import Dict, List, Optional, Set, Tuple, Union 

4from dataclasses import dataclass 

5 

6@dataclass 

7class AnalysisResult: 

8 """Container for analysis results.""" 

9 summary: dict 

10 insights: List[str] 

11 files: Dict[str, dict] 

12 

13 def to_text(self) -> str: 

14 """Convert analysis to LLM-friendly text format.""" 

15 from ..formatters.llm import format_analysis 

16 return format_analysis(self) 

17 

18 def to_json(self) -> str: 

19 """Convert analysis to JSON format.""" 

20 import json 

21 return json.dumps({ 

22 'summary': self.summary, 

23 'insights': self.insights, 

24 'files': self.files 

25 }, indent=2) 

26 

27class BaseAnalyzer(ABC): 

28 """Base class for all code analyzers.""" 

29 

30 @abstractmethod 

31 def analyze_file(self, file_path: Path) -> dict: 

32 """ 

33 Analyze a file and return standardized analysis results. 

34 

35 Args: 

36 file_path: Path to the file to analyze. 

37 

38 Returns: 

39 dict with the following structure: 

40 { 

41 'type': str, # Analyzer type (e.g., 'python', 'sql') 

42 'metrics': { 

43 'loc': int, # Lines of code 

44 'classes': int, # Number of classes 

45 'functions': int, # Number of functions 

46 'imports': int, # Number of imports 

47 'complexity': int # Complexity metric 

48 }, 

49 'imports': List[str], # List of import statements 

50 'functions': List[dict], # List of function details 

51 'classes': List[dict], # List of class details 

52 'comments': List[dict], # List of comments 

53 'todos': List[dict], # List of TODOs 

54 'errors': List[dict], # Optional analysis errors 

55 'full_content': str, # Optional full file content 

56 } 

57 

58 Note: 

59 - All fields are optional except 'type' and 'metrics' 

60 - Language-specific analyzers may add additional fields 

61 """ 

62 pass 

63 

64class ProjectAnalyzer: 

65 """Main project analyzer that coordinates language-specific analyzers.""" 

66 

67 def __init__(self): 

68 self.analyzers = self._initialize_analyzers() 

69 

70 def _initialize_analyzers(self) -> Dict[str, BaseAnalyzer]: 

71 """Initialize language-specific analyzers.""" 

72 from .python import PythonAnalyzer 

73 from .javascript import JavaScriptAnalyzer 

74 from .sql import SQLServerAnalyzer 

75 

76 return { 

77 '.py': PythonAnalyzer(), 

78 '.js': JavaScriptAnalyzer(), 

79 '.jsx': JavaScriptAnalyzer(), 

80 '.ts': JavaScriptAnalyzer(), 

81 '.tsx': JavaScriptAnalyzer(), 

82 '.sql': SQLServerAnalyzer(), 

83 } 

84 

85 def analyze(self, path: Path) -> AnalysisResult: 

86 """Analyze entire project directory.""" 

87 # Initialize analysis structure 

88 analysis = { 

89 'summary': { 

90 'project_stats': { 

91 'total_files': 0, 

92 'by_type': {}, 

93 'lines_of_code': 0, 

94 'avg_file_size': 0 

95 }, 

96 'code_metrics': { 

97 'functions': {'count': 0, 'with_docs': 0, 'complex': 0}, 

98 'classes': {'count': 0, 'with_docs': 0}, 

99 'imports': {'count': 0, 'unique': set()} 

100 }, 

101 'maintenance': { 

102 'todos': [], 

103 'comments_ratio': 0, 

104 'doc_coverage': 0 

105 }, 

106 'structure': { 

107 'directories': set(), 

108 'entry_points': [], 

109 'core_files': [] 

110 } 

111 }, 

112 'insights': [], 

113 'files': {} 

114 } 

115 

116 # Collect analyzable files 

117 files = self._collect_files(path) 

118 analysis['summary']['project_stats']['total_files'] = len(files) 

119 

120 # Process each file 

121 for file_path in files: 

122 if analyzer := self.analyzers.get(file_path.suffix.lower()): 

123 try: 

124 file_analysis = analyzer.analyze_file(file_path) 

125 str_path = str(file_path) 

126 

127 # Ensure file_analysis has required fields 

128 if not isinstance(file_analysis, dict): 

129 print(f"Error analyzing {file_path}: Invalid analysis result") 

130 continue 

131 

132 if 'type' not in file_analysis: 

133 file_analysis['type'] = file_path.suffix.lower().lstrip('.') 

134 

135 # Skip files with errors unless they have partial results 

136 if 'errors' in file_analysis and not file_analysis.get('metrics', {}).get('loc', 0): 

137 print(f"Error analyzing {file_path}: {file_analysis['errors']}") 

138 continue 

139 

140 # Update file types count 

141 ext = file_path.suffix 

142 analysis['summary']['project_stats']['by_type'][ext] = \ 

143 analysis['summary']['project_stats']['by_type'].get(ext, 0) + 1 

144 

145 # Store file analysis 

146 analysis['files'][str_path] = file_analysis 

147 

148 # Update metrics 

149 self._update_metrics(analysis, file_analysis, str_path) 

150 

151 except Exception as e: 

152 print(f"Error analyzing {file_path}: {e}") 

153 continue 

154 

155 # Calculate final metrics 

156 self._calculate_final_metrics(analysis) 

157 

158 # Generate insights 

159 if insights_gen := analysis.get('summary', {}).get('insights_generator'): 

160 analysis['insights'] = insights_gen(analysis) 

161 else: 

162 analysis['insights'] = self._generate_default_insights(analysis) 

163 

164 return AnalysisResult(**analysis) 

165 

166 def _collect_files(self, path: Path) -> List[Path]: 

167 """Collect all analyzable files from directory.""" 

168 files = [] 

169 

170 for file_path in path.rglob('*'): 

171 if (file_path.is_file() and 

172 file_path.suffix.lower() in self.analyzers): 

173 files.append(file_path) 

174 

175 return files 

176 

177 def _update_metrics(self, analysis: dict, file_analysis: dict, file_path: str) -> None: 

178 """Update project metrics with file analysis results.""" 

179 metrics = file_analysis.get('metrics', {}) 

180 

181 # Update basic metrics 

182 analysis['summary']['project_stats']['lines_of_code'] += metrics.get('loc', 0) 

183 

184 # Update function metrics 

185 functions = file_analysis.get('functions', []) 

186 analysis['summary']['code_metrics']['functions']['count'] += len(functions) 

187 analysis['summary']['code_metrics']['functions']['with_docs'] += \ 

188 sum(1 for f in functions if f.get('docstring')) 

189 analysis['summary']['code_metrics']['functions']['complex'] += \ 

190 sum(1 for f in functions if f.get('complexity', 0) > 5) 

191 

192 # Update class metrics 

193 classes = file_analysis.get('classes', []) 

194 analysis['summary']['code_metrics']['classes']['count'] += len(classes) 

195 analysis['summary']['code_metrics']['classes']['with_docs'] += \ 

196 sum(1 for c in classes if c.get('docstring')) 

197 

198 # Update imports 

199 imports = file_analysis.get('imports', []) 

200 analysis['summary']['code_metrics']['imports']['count'] += len(imports) 

201 analysis['summary']['code_metrics']['imports']['unique'].update(imports) 

202 

203 # Update structure info 

204 dir_path = str(Path(file_path).parent) 

205 analysis['summary']['structure']['directories'].add(dir_path) 

206 

207 # Update entry points 

208 if self._is_entry_point(file_path, file_analysis): 

209 analysis['summary']['structure']['entry_points'].append(file_path) 

210 

211 # Update core files 

212 if self._is_core_file(file_analysis): 

213 analysis['summary']['structure']['core_files'].append(file_path) 

214 

215 # Update maintenance info 

216 for todo in file_analysis.get('todos', []): 

217 analysis['summary']['maintenance']['todos'].append({ 

218 'file': file_path, 

219 'line': todo.get('line', 0), 

220 'text': todo.get('text', ''), 

221 'priority': self._estimate_todo_priority(todo.get('text', '')) 

222 }) 

223 

224 def _calculate_final_metrics(self, analysis: dict) -> None: 

225 """Calculate final metrics and handle serialization.""" 

226 total_files = analysis['summary']['project_stats']['total_files'] 

227 if total_files > 0: 

228 # Calculate average file size 

229 analysis['summary']['project_stats']['avg_file_size'] = \ 

230 analysis['summary']['project_stats']['lines_of_code'] / total_files 

231 

232 # Calculate documentation coverage 

233 total_elements = ( 

234 analysis['summary']['code_metrics']['functions']['count'] + 

235 analysis['summary']['code_metrics']['classes']['count'] 

236 ) 

237 if total_elements > 0: 

238 documented = ( 

239 analysis['summary']['code_metrics']['functions']['with_docs'] + 

240 analysis['summary']['code_metrics']['classes']['with_docs'] 

241 ) 

242 analysis['summary']['maintenance']['doc_coverage'] = \ 

243 (documented / total_elements) * 100 

244 

245 # Convert sets to lists for serialization 

246 analysis['summary']['code_metrics']['imports']['unique'] = \ 

247 list(analysis['summary']['code_metrics']['imports']['unique']) 

248 analysis['summary']['structure']['directories'] = \ 

249 list(analysis['summary']['structure']['directories']) 

250 

251 def _is_entry_point(self, file_path: str, analysis: dict) -> bool: 

252 """Identify if a file is a potential entry point.""" 

253 filename = Path(file_path).name 

254 if filename in ['main.py', 'app.py', 'cli.py', 'server.py', 'index.js', 'server.js']: 

255 return True 

256 

257 # Check for main-like functions 

258 for func in analysis.get('functions', []): 

259 if func['name'] in ['main', 'run', 'start']: 

260 return True 

261 

262 return False 

263 

264 def _is_core_file(self, analysis: dict) -> bool: 

265 """Identify if a file is likely a core component.""" 

266 if len(analysis.get('functions', [])) > 5: 

267 return True 

268 if len(analysis.get('classes', [])) > 2: 

269 return True 

270 if analysis.get('metrics', {}).get('complexity', 0) > 20: 

271 return True 

272 return False 

273 

274 def _estimate_todo_priority(self, text: str) -> str: 

275 """Estimate TODO priority based on content.""" 

276 text = text.lower() 

277 if any(word in text for word in ['urgent', 'critical', 'fixme', 'bug']): 

278 return 'high' 

279 if any(word in text for word in ['important', 'needed', 'should']): 

280 return 'medium' 

281 return 'low' 

282 

283 def _generate_default_insights(self, analysis: dict) -> List[str]: 

284 """Generate default insights from analysis results.""" 

285 insights = [] 

286 

287 # Basic project stats 

288 total_files = analysis['summary']['project_stats']['total_files'] 

289 insights.append(f"Project contains {total_files} analyzable files") 

290 

291 # Documentation insights 

292 doc_coverage = analysis['summary']['maintenance']['doc_coverage'] 

293 if doc_coverage < 50: 

294 insights.append(f"Low documentation coverage ({doc_coverage:.1f}%)") 

295 elif doc_coverage > 80: 

296 insights.append(f"Good documentation coverage ({doc_coverage:.1f}%)") 

297 

298 # Complexity insights 

299 complex_funcs = analysis['summary']['code_metrics']['functions']['complex'] 

300 if complex_funcs > 0: 

301 insights.append(f"Found {complex_funcs} complex functions that might need attention") 

302 

303 # TODO insights 

304 todos = analysis['summary']['maintenance']['todos'] 

305 if todos: 

306 high_priority = sum(1 for todo in todos if todo['priority'] == 'high') 

307 if high_priority > 0: 

308 insights.append(f"Found {high_priority} high-priority TODOs") 

309 

310 return insights