Coverage for src\llm_code_lens\cli.py: 83%

281 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-01-12 10:58 +0200

1#!/usr/bin/env python3 

2""" 

3LLM Code Lens - CLI Module 

4Handles command-line interface and coordination of analysis components. 

5""" 

6 

7import click 

8from pathlib import Path 

9from typing import Dict, List, Union, Optional 

10from rich.console import Console 

11from .analyzer.base import ProjectAnalyzer, AnalysisResult 

12from .analyzer.sql import SQLServerAnalyzer 

13import tiktoken 

14import traceback 

15import os 

16import json 

17import shutil 

18 

19console = Console() 

20 

21def parse_ignore_file(ignore_file: Path) -> List[str]: 

22 """Parse .llmclignore file and return list of patterns.""" 

23 if not ignore_file.exists(): 

24 return [] 

25 

26 patterns = [] 

27 try: 

28 with ignore_file.open() as f: 

29 for line in f: 

30 line = line.strip() 

31 # Skip empty lines and comments 

32 if line and not line.startswith('#'): 

33 patterns.append(line) 

34 except Exception as e: 

35 print(f"Warning: Error reading {ignore_file}: {e}") 

36 

37 return patterns 

38 

39def should_ignore(path: Path, ignore_patterns: Optional[List[str]] = None) -> bool: 

40 """Determine if a file or directory should be ignored based on patterns.""" 

41 if ignore_patterns is None: 

42 ignore_patterns = [] 

43 

44 path_str = str(path) 

45 default_ignores = { 

46 '.git', '__pycache__', '.pytest_cache', '.idea', '.vscode', 

47 'node_modules', 'venv', 'env', 'dist', 'build', '.tox', 'htmlcov' 

48 } 

49 

50 # Check default ignores 

51 for pattern in default_ignores: 

52 if pattern in path_str: 

53 return True 

54 

55 # Check custom ignore patterns 

56 for pattern in ignore_patterns: 

57 if pattern in path_str: 

58 return True 

59 

60 return False 

61 

62 

63 

64def is_binary(file_path: Path) -> bool: 

65 """Check if a file is binary.""" 

66 try: 

67 with file_path.open('rb') as f: 

68 for block in iter(lambda: f.read(1024), b''): 

69 if b'\0' in block: 

70 return True 

71 except Exception: 

72 return True 

73 return False 

74 

75def split_content_by_tokens(content: str, chunk_size: int = 100000) -> List[str]: 

76 """ 

77 Split content into chunks based on token count. 

78 Handles large content safely by pre-chunking before tokenization. 

79  

80 Args: 

81 content (str): The content to split 

82 chunk_size (int): Target size for each chunk in tokens 

83  

84 Returns: 

85 List[str]: List of content chunks 

86 """ 

87 if not content: 

88 return [''] 

89 

90 try: 

91 # First do a rough pre-chunking by characters to avoid stack overflow 

92 MAX_CHUNK_CHARS = 100000 # Adjust this based on your needs 

93 rough_chunks = [] 

94 

95 for i in range(0, len(content), MAX_CHUNK_CHARS): 

96 rough_chunks.append(content[i:i + MAX_CHUNK_CHARS]) 

97 

98 encoder = tiktoken.get_encoding("cl100k_base") 

99 final_chunks = [] 

100 

101 # Process each rough chunk 

102 for rough_chunk in rough_chunks: 

103 tokens = encoder.encode(rough_chunk) 

104 

105 # Split into smaller chunks based on token count 

106 for i in range(0, len(tokens), chunk_size): 

107 chunk_tokens = tokens[i:i + chunk_size] 

108 chunk_content = encoder.decode(chunk_tokens) 

109 final_chunks.append(chunk_content) 

110 

111 return final_chunks 

112 

113 except Exception as e: 

114 # Fallback to line-based splitting 

115 return _split_by_lines(content, max_chunk_size=chunk_size) 

116 

117def _split_by_lines(content: str, max_chunk_size: int = 100000) -> List[str]: 

118 """Split content by lines with a maximum chunk size.""" 

119 lines = content.splitlines(keepends=True) # Keep line endings 

120 chunks = [] 

121 current_chunk = [] 

122 current_size = 0 

123 

124 for line in lines: 

125 line_size = len(line.encode('utf-8')) 

126 if current_size + line_size > max_chunk_size and current_chunk: 

127 chunks.append(''.join(current_chunk)) 

128 current_chunk = [line] 

129 current_size = line_size 

130 else: 

131 current_chunk.append(line) 

132 current_size += line_size 

133 

134 if current_chunk: 

135 chunks.append(''.join(current_chunk)) 

136 

137 # Handle special case where we got no chunks 

138 if not chunks and content: 

139 return [content] # Return entire content as one chunk 

140 

141 return chunks 

142 

143def delete_and_create_output_dir(output_dir: Path) -> None: 

144 """Delete the output directory if it exists and recreate it.""" 

145 if output_dir.exists() and output_dir.is_dir(): 

146 shutil.rmtree(output_dir) 

147 output_dir.mkdir(parents=True, exist_ok=True) 

148 

149def export_full_content(path: Path, output_dir: Path, ignore_patterns: List[str]) -> None: 

150 """Export full content of all files in separate token-limited files.""" 

151 file_content = [] 

152 

153 # Export file system content 

154 for file_path in path.rglob('*'): 

155 if file_path.is_file() and not should_ignore(file_path, ignore_patterns) and not is_binary(file_path): 

156 try: 

157 content = file_path.read_text(encoding='utf-8') 

158 file_content.append(f"\nFILE: {file_path}\n{'='*80}\n{content}\n") 

159 except Exception as e: 

160 console.print(f"[yellow]Warning: Error reading {file_path}: {str(e)}[/]") 

161 continue 

162 

163 # Combine all content 

164 full_content = "\n".join(file_content) 

165 

166 # Split and write content 

167 chunks = split_content_by_tokens(full_content, chunk_size=100000) 

168 for i, chunk in enumerate(chunks, 1): 

169 output_file = output_dir / f'full_{i}.txt' 

170 try: 

171 output_file.write_text(chunk, encoding='utf-8') 

172 console.print(f"[green]Created full content file: {output_file}[/]") 

173 except Exception as e: 

174 console.print(f"[yellow]Warning: Error writing {output_file}: {str(e)}[/]") 

175 

176def export_sql_content(sql_results: dict, output_dir: Path) -> None: 

177 """Export full content of SQL objects in separate token-limited files.""" 

178 file_content = [] 

179 

180 # Process stored procedures 

181 for proc in sql_results.get('stored_procedures', []): 

182 content = f""" 

183STORED PROCEDURE: [{proc['schema']}].[{proc['name']}] 

184{'='*80} 

185{proc['definition']} 

186""" 

187 file_content.append(content) 

188 

189 # Process views 

190 for view in sql_results.get('views', []): 

191 content = f""" 

192VIEW: [{view['schema']}].[{view['name']}] 

193{'='*80} 

194{view['definition']} 

195""" 

196 file_content.append(content) 

197 

198 # Process functions 

199 for func in sql_results.get('functions', []): 

200 content = f""" 

201FUNCTION: [{func['schema']}].[{func['name']}] 

202{'='*80} 

203{func['definition']} 

204""" 

205 file_content.append(content) 

206 

207 # Split and write content 

208 if file_content: 

209 full_content = "\n".join(file_content) 

210 chunks = split_content_by_tokens(full_content, chunk_size=100000) 

211 

212 for i, chunk in enumerate(chunks, 1): 

213 output_file = output_dir / f'sql_full_{i}.txt' 

214 try: 

215 output_file.write_text(chunk, encoding='utf-8') 

216 console.print(f"[green]Created SQL content file: {output_file}[/]") 

217 except Exception as e: 

218 console.print(f"[yellow]Warning: Error writing {output_file}: {str(e)}[/]") 

219 

220def _combine_fs_results(combined: dict, result: Union[dict, AnalysisResult]) -> None: 

221 """Combine file system analysis results.""" 

222 if isinstance(result, AnalysisResult): 

223 result_dict = result.dict() # Convert AnalysisResult to dict 

224 else: 

225 result_dict = result # Already a dict 

226 

227 # Update project stats 

228 stats = result_dict.get('summary', {}).get('project_stats', {}) 

229 combined['summary']['project_stats']['total_files'] += stats.get('total_files', 0) 

230 combined['summary']['project_stats']['lines_of_code'] += stats.get('lines_of_code', 0) 

231 

232 # Update code metrics 

233 metrics = result_dict.get('summary', {}).get('code_metrics', {}) 

234 for metric_type in ['functions', 'classes']: 

235 if metric_type in metrics: 

236 for key in ['count', 'with_docs', 'complex']: 

237 if key in metrics[metric_type]: 

238 combined['summary']['code_metrics'][metric_type][key] += metrics[metric_type][key] 

239 

240 # Update imports 

241 if 'imports' in metrics: 

242 combined['summary']['code_metrics']['imports']['count'] += metrics['imports'].get('count', 0) 

243 unique_imports = metrics['imports'].get('unique', set()) 

244 if isinstance(unique_imports, (set, list)): 

245 combined['summary']['code_metrics']['imports']['unique'].update(unique_imports) 

246 

247 # Update maintenance info 

248 maintenance = result_dict.get('summary', {}).get('maintenance', {}) 

249 combined['summary']['maintenance']['todos'].extend(maintenance.get('todos', [])) 

250 

251 # Update structure info 

252 structure = result_dict.get('summary', {}).get('structure', {}) 

253 if 'directories' in structure: 

254 dirs = structure['directories'] 

255 if isinstance(dirs, (set, list)): 

256 combined['summary']['structure']['directories'].update(dirs) 

257 

258 # Update insights and files 

259 if 'insights' in result_dict: 

260 combined['insights'].extend(result_dict['insights']) 

261 if 'files' in result_dict: 

262 combined['files'].update(result_dict['files']) 

263 

264def _combine_results(results: List[Union[dict, AnalysisResult]]) -> AnalysisResult: 

265 """Combine multiple analysis results into a single result.""" 

266 combined = { 

267 'summary': { 

268 'project_stats': { 

269 'total_files': 0, 

270 'total_sql_objects': 0, 

271 'by_type': {}, 

272 'lines_of_code': 0, 

273 'avg_file_size': 0 

274 }, 

275 'code_metrics': { 

276 'functions': {'count': 0, 'with_docs': 0, 'complex': 0}, 

277 'classes': {'count': 0, 'with_docs': 0}, 

278 'sql_objects': {'procedures': 0, 'views': 0, 'functions': 0}, 

279 'imports': {'count': 0, 'unique': set()} 

280 }, 

281 'maintenance': { 

282 'todos': [], 

283 'comments_ratio': 0, 

284 'doc_coverage': 0 

285 }, 

286 'structure': { 

287 'directories': set(), 

288 'entry_points': [], 

289 'core_files': [], 

290 'sql_dependencies': [] 

291 } 

292 }, 

293 'insights': [], 

294 'files': {} 

295 } 

296 

297 for result in results: 

298 if isinstance(result, dict) and ('stored_procedures' in result or 'views' in result): 

299 _combine_sql_results(combined, result) 

300 else: 

301 # If result is AnalysisResult, convert to dict using to_json and json.loads 

302 if isinstance(result, AnalysisResult): 

303 import json 

304 result_dict = json.loads(result.to_json()) 

305 else: 

306 result_dict = result 

307 _combine_fs_results(combined, result_dict) 

308 

309 # Calculate final metrics 

310 total_items = (combined['summary']['project_stats']['total_files'] + 

311 combined['summary']['project_stats']['total_sql_objects']) 

312 

313 if total_items > 0: 

314 combined['summary']['project_stats']['avg_file_size'] = ( 

315 combined['summary']['project_stats']['lines_of_code'] / total_items 

316 ) 

317 

318 # Convert sets to lists for JSON serialization 

319 combined['summary']['code_metrics']['imports']['unique'] = list( 

320 combined['summary']['code_metrics']['imports']['unique'] 

321 ) 

322 combined['summary']['structure']['directories'] = list( 

323 combined['summary']['structure']['directories'] 

324 ) 

325 

326 return AnalysisResult(**combined) 

327 

328 

329def _combine_sql_results(combined: dict, sql_result: dict) -> None: 

330 """Combine SQL results with proper object counting.""" 

331 # Count objects 

332 proc_count = len(sql_result.get('stored_procedures', [])) 

333 view_count = len(sql_result.get('views', [])) 

334 func_count = len(sql_result.get('functions', [])) 

335 

336 # Update stats 

337 combined['summary']['project_stats']['total_sql_objects'] += proc_count + view_count + func_count 

338 combined['summary']['code_metrics']['sql_objects']['procedures'] += proc_count 

339 combined['summary']['code_metrics']['sql_objects']['views'] += view_count 

340 combined['summary']['code_metrics']['sql_objects']['functions'] += func_count 

341 

342 # Add objects to files 

343 for proc in sql_result.get('stored_procedures', []): 

344 key = f"stored_proc_{proc['name']}" 

345 combined['files'][key] = proc 

346 for view in sql_result.get('views', []): 

347 key = f"view_{view['name']}" 

348 combined['files'][key] = view 

349 

350 

351 

352 

353@click.command() 

354@click.argument('path', type=click.Path(exists=True), default='.') 

355@click.option('--output', '-o', help='Output directory', default='.codelens') 

356@click.option('--format', '-f', type=click.Choice(['txt', 'json']), default='txt') 

357@click.option('--full', is_flag=True, help='Export full file/object contents in separate files') 

358@click.option('--debug', is_flag=True, help='Enable debug output') 

359@click.option('--sql-server', help='SQL Server connection string') 

360@click.option('--sql-database', help='SQL Database to analyze') 

361@click.option('--sql-config', help='Path to SQL configuration file') 

362@click.option('--exclude', '-e', multiple=True, help='Patterns to exclude (can be used multiple times)') 

363def main(path: str, output: str, format: str, full: bool, debug: bool, 

364 sql_server: str, sql_database: str, sql_config: str, exclude: tuple): 

365 try: 

366 # Convert to absolute paths 

367 path = Path(path).resolve() 

368 output_path = Path(output).resolve() 

369 

370 # Ensure output directory exists 

371 try: 

372 delete_and_create_output_dir(output_path) 

373 except Exception as e: 

374 console.print(f"[red]Error creating output directory: {str(e)}[/]") 

375 return 1 

376 

377 if debug: 

378 console.print(f"[blue]Output directory: {output_path}[/]") 

379 

380 # Rest of the main function remains unchanged 

381 results = [] 

382 

383 # Load SQL configuration if provided 

384 if sql_config: 

385 try: 

386 with open(sql_config) as f: 

387 sql_settings = json.load(f) 

388 sql_server = sql_settings.get('server') 

389 sql_database = sql_settings.get('database') 

390 

391 # Set environment variables if provided in config 

392 for key, value in sql_settings.get('env', {}).items(): 

393 os.environ[key] = value 

394 except Exception as e: 

395 console.print(f"[yellow]Warning: Error loading SQL config: {str(e)}[/]") 

396 if debug: 

397 console.print(traceback.format_exc()) 

398 

399 # Run SQL analysis if requested 

400 if sql_server or sql_database or os.getenv('MSSQL_SERVER'): 

401 console.print("[bold blue]📊 Starting SQL Analysis...[/]") 

402 analyzer = SQLServerAnalyzer() 

403 try: 

404 analyzer.connect(sql_server) # Will use env vars if not provided 

405 if sql_database: 

406 console.print(f"[blue]Analyzing database: {sql_database}[/]") 

407 sql_result = analyzer.analyze_database(sql_database) 

408 results.append(sql_result) 

409 

410 if full: 

411 console.print("[blue]Exporting SQL content...[/]") 

412 export_sql_content(sql_result, output_path) 

413 else: 

414 # Get all databases the user has access to 

415 databases = analyzer.list_databases() 

416 for db in databases: 

417 console.print(f"[blue]Analyzing database: {db}[/]") 

418 sql_result = analyzer.analyze_database(db) 

419 results.append(sql_result) 

420 

421 if full: 

422 console.print(f"[blue]Exporting SQL content for {db}...[/]") 

423 export_sql_content(sql_result, output_path) 

424 

425 except Exception as e: 

426 console.print(f"[yellow]Warning during SQL analysis: {str(e)}[/]") 

427 if debug: 

428 console.print(traceback.format_exc()) 

429 

430 # Run file system analysis 

431 console.print("[bold blue]📁 Starting File System Analysis...[/]") 

432 analyzer = ProjectAnalyzer() 

433 fs_results = analyzer.analyze(path) 

434 results.append(fs_results) 

435 

436 # Combine results 

437 combined_results = _combine_results(results) 

438 

439 if debug: 

440 console.print("[blue]Analysis complete, writing results...[/]") 

441 

442 # Write results 

443 result_file = output_path / f'analysis.{format}' 

444 try: 

445 # Ensure output directory exists 

446 output_path.mkdir(parents=True, exist_ok=True) 

447 

448 content = combined_results.to_json() if format == 'json' else combined_results.to_text() 

449 result_file.write_text(content, encoding='utf-8') 

450 except Exception as e: 

451 console.print(f"[red]Error writing results: {str(e)}[/]") 

452 return 1 

453 

454 console.print(f"[bold green]✨ Analysis saved to {result_file}[/]") 

455 

456 # Handle full content export 

457 if full: 

458 console.print("[bold blue]📦 Exporting full contents...[/]") 

459 try: 

460 ignore_patterns = parse_ignore_file(Path('.llmclignore')) + list(exclude) 

461 export_full_content(path, output_path, ignore_patterns) 

462 console.print("[bold green]✨ Full content export complete![/]") 

463 except Exception as e: 

464 console.print(f"[yellow]Warning during full export: {str(e)}[/]") 

465 if debug: 

466 console.print(traceback.format_exc()) 

467 

468 # Friendly message to prompt users to give a star 

469 console.print("\n [bold yellow] ⭐⭐⭐⭐⭐ If you like this tool, please consider giving it a star on GitHub![/]") 

470 console.print("[bold blue]Visit: https://github.com/SikamikanikoBG/codelens.git[/]") 

471 

472 return 0 

473 

474 except Exception as e: 

475 console.print("[bold red]Error occurred:[/]") 

476 if debug: 

477 console.print(traceback.format_exc()) 

478 else: 

479 console.print(f"[bold red]Error: {str(e)}[/]") 

480 return 1 

481 

482if __name__ == '__main__': 

483 main()