Coverage for src\llm_code_lens\processors\summary.py: 72%

125 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-01-12 10:23 +0200

1# File: R:\Projects\codelens\src\codelens\processor\summary.py 

2 

3from typing import Dict, List 

4from pathlib import Path 

5 

6# src/llm_code_lens/processors/summary.py 

7 

8def generate_summary(analysis: Dict[str, dict]) -> dict: 

9 """Generate summary with improved metrics handling.""" 

10 summary = { 

11 'project_stats': { 

12 'total_files': len(analysis), 

13 'by_type': {}, 

14 'lines_of_code': 0, 

15 'avg_file_size': 0 

16 }, 

17 'code_metrics': { 

18 'functions': {'count': 0, 'with_docs': 0, 'complex': 0}, 

19 'classes': {'count': 0, 'with_docs': 0}, 

20 'imports': {'count': 0, 'unique': set()}, # Changed to set() 

21 'sql_objects': {'procedures': 0, 'views': 0, 'functions': 0} 

22 }, 

23 'maintenance': { 

24 'todos': [], 

25 'comments_ratio': 0, 

26 'doc_coverage': 0 

27 }, 

28 'structure': { 

29 'directories': set(), 

30 'entry_points': [], 

31 'core_files': [], # Added missing key 

32 'sql_dependencies': [] 

33 } 

34 } 

35 

36 # Process each file 

37 for file_path, file_analysis in analysis.items(): 

38 _process_file_stats(file_path, file_analysis, summary) 

39 _process_code_metrics(file_analysis, summary) 

40 _process_maintenance_info(file_path, file_analysis, summary) 

41 _process_structure_info(file_path, file_analysis, summary) 

42 

43 # Calculate final metrics 

44 _calculate_final_metrics(summary) 

45 

46 return summary 

47 

48 

49 

50def _process_file_stats(file_path: str, analysis: dict, summary: dict) -> None: 

51 """Process basic file statistics.""" 

52 # Track file types 

53 ext = Path(file_path).suffix 

54 summary['project_stats']['by_type'][ext] = \ 

55 summary['project_stats']['by_type'].get(ext, 0) + 1 

56 

57 # Track lines of code 

58 metrics = analysis.get('metrics', {}) 

59 loc = metrics.get('loc', 0) 

60 summary['project_stats']['lines_of_code'] += loc 

61 

62def _process_code_metrics(file_analysis: dict, summary: dict) -> None: 

63 """Process code metrics from analysis.""" 

64 if not isinstance(file_analysis, dict): 

65 return 

66 

67 # Process functions 

68 for func in file_analysis.get('functions', []): 

69 if not isinstance(func, dict): 

70 continue 

71 

72 summary['code_metrics']['functions']['count'] += 1 

73 

74 if func.get('docstring'): 

75 summary['code_metrics']['functions']['with_docs'] += 1 

76 

77 # Safely handle complexity and loc values that might be None 

78 complexity = func.get('complexity') 

79 loc = func.get('loc') 

80 

81 if (complexity is not None and complexity > 5) or \ 

82 (loc is not None and loc > 50): 

83 summary['code_metrics']['functions']['complex'] += 1 

84 

85 # Process classes 

86 for cls in file_analysis.get('classes', []): 

87 if not isinstance(cls, dict): 

88 continue 

89 

90 summary['code_metrics']['classes']['count'] += 1 

91 if cls.get('docstring'): 

92 summary['code_metrics']['classes']['with_docs'] += 1 

93 

94 # Process imports 

95 imports = file_analysis.get('imports', []) 

96 if isinstance(imports, list): 

97 summary['code_metrics']['imports']['count'] += len(imports) 

98 summary['code_metrics']['imports']['unique'].update( 

99 set(imp for imp in imports if isinstance(imp, str)) 

100 ) 

101 

102 

103 

104 

105def _process_maintenance_info(file_path: str, analysis: dict, summary: dict) -> None: 

106 """Process maintenance-related information.""" 

107 # Track TODOs 

108 for todo in analysis.get('todos', []): 

109 summary['maintenance']['todos'].append({ 

110 'file': file_path, 

111 'line': todo['line'], 

112 'text': todo['text'], 

113 'priority': _estimate_todo_priority(todo['text']) 

114 }) 

115 

116 # Track comments 

117 comments = len(analysis.get('comments', [])) 

118 lines = analysis.get('metrics', {}).get('loc', 0) 

119 if lines > 0: 

120 summary['maintenance']['comments_ratio'] += comments / lines 

121 

122def _process_structure_info(file_path: str, analysis: dict, summary: dict) -> None: 

123 """Process project structure information.""" 

124 # Track directories 

125 dir_path = str(Path(file_path).parent) 

126 summary['structure']['directories'].add(dir_path) 

127 

128 # Identify potential entry points 

129 if _is_potential_entry_point(file_path, analysis): 

130 summary['structure']['entry_points'].append(file_path) 

131 

132 # Identify core files based on imports 

133 if _is_core_file(analysis): 

134 summary['structure']['core_files'].append(file_path) 

135 

136def _calculate_final_metrics(summary: dict) -> None: 

137 """Calculate final averages and percentages.""" 

138 total_files = summary['project_stats']['total_files'] 

139 if total_files > 0: 

140 # Calculate average file size 

141 summary['project_stats']['avg_file_size'] = \ 

142 summary['project_stats']['lines_of_code'] / total_files 

143 

144 # Calculate documentation coverage 

145 funcs = summary['code_metrics']['functions'] 

146 classes = summary['code_metrics']['classes'] 

147 total_elements = funcs['count'] + classes['count'] 

148 if total_elements > 0: 

149 documented = funcs['with_docs'] + classes['with_docs'] 

150 summary['maintenance']['doc_coverage'] = \ 

151 (documented / total_elements) * 100 

152 

153 # Convert sets to lists for JSON serialization 

154 summary['code_metrics']['imports']['unique'] = \ 

155 list(summary['code_metrics']['imports']['unique']) 

156 summary['structure']['directories'] = \ 

157 list(summary['structure']['directories']) 

158 

159def _estimate_todo_priority(text: str) -> str: 

160 """Estimate TODO priority based on content.""" 

161 text = text.lower() 

162 if any(word in text for word in ['urgent', 'critical', 'fixme', 'bug']): 

163 return 'high' 

164 if any(word in text for word in ['important', 'needed', 'should']): 

165 return 'medium' 

166 return 'low' 

167 

168def _is_potential_entry_point(file_path: str, analysis: dict) -> bool: 

169 """Identify if a file is a potential entry point.""" 

170 filename = Path(file_path).name 

171 if filename in {'main.py', 'app.py', 'cli.py', 'server.py', 'index.js', 'server.js'}: 

172 return True 

173 

174 # Check for main-like functions 

175 for func in analysis.get('functions', []): 

176 if func['name'] in {'main', 'run', 'start', 'cli', 'execute'}: 

177 return True 

178 

179 return False 

180 

181def _is_core_file(analysis: dict) -> bool: 

182 """Identify if a file is likely a core component with improved criteria.""" 

183 # Check function count 

184 if len(analysis.get('functions', [])) > 5: 

185 return True 

186 

187 # Check class count 

188 if len(analysis.get('classes', [])) > 2: 

189 return True 

190 

191 # Check function complexity 

192 complex_funcs = sum(1 for f in analysis.get('functions', []) 

193 if (f.get('complexity', 0) > 5 or 

194 f.get('loc', 0) > 50 or 

195 len(f.get('args', [])) > 3)) 

196 if complex_funcs >= 1: 

197 return True 

198 

199 # Check file complexity 

200 if analysis.get('metrics', {}).get('complexity', 0) > 20: 

201 return True 

202 

203 return False 

204 

205def generate_insights(analysis: Dict[str, dict]) -> List[str]: 

206 """Generate insights with improved handling of file analysis.""" 

207 insights = [] 

208 total_files = len(analysis) 

209 

210 # Basic project stats 

211 if total_files == 1: 

212 insights.append(f"Found 1 analyzable file") 

213 else: 

214 insights.append(f"Found {total_files} analyzable files") 

215 

216 # Track various metrics 

217 total_todos = 0 

218 todo_priorities = {'high': 0, 'medium': 0, 'low': 0} 

219 undocumented_count = 0 

220 complex_functions = [] 

221 

222 for file_path, file_analysis in analysis.items(): 

223 # Process TODOs 

224 for todo in file_analysis.get('todos', []): 

225 total_todos += 1 

226 text = todo.get('text', '').lower() 

227 if any(word in text for word in ['urgent', 'critical', 'memory leak', 'security']): 

228 todo_priorities['high'] += 1 

229 elif any(word in text for word in ['important', 'needed']): 

230 todo_priorities['medium'] += 1 

231 else: 

232 todo_priorities['low'] += 1 

233 

234 # Process functions 

235 for func in file_analysis.get('functions', []): 

236 if not func.get('docstring'): 

237 undocumented_count += 1 

238 if func.get('complexity', 0) > 5 or func.get('loc', 0) > 50: 

239 complex_functions.append(f"{func['name']} in {file_path}") 

240 

241 # Add insights based on findings 

242 if total_todos > 0: 

243 insights.append(f"Found {total_todos} TODOs across {len(analysis)} files") 

244 if todo_priorities['high'] > 0: 

245 insights.append(f"Found {todo_priorities['high']} high-priority TODOs") 

246 

247 if complex_functions: 

248 insights.append(f"Complex functions detected: {', '.join(complex_functions)}") 

249 

250 if undocumented_count > 0: 

251 insights.append(f"Found {undocumented_count} undocumented functions") 

252 

253 return insights 

254 

255