Coverage for src\llm_code_lens\cli.py: 83%
281 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:58 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:58 +0200
1#!/usr/bin/env python3
2"""
3LLM Code Lens - CLI Module
4Handles command-line interface and coordination of analysis components.
5"""
7import click
8from pathlib import Path
9from typing import Dict, List, Union, Optional
10from rich.console import Console
11from .analyzer.base import ProjectAnalyzer, AnalysisResult
12from .analyzer.sql import SQLServerAnalyzer
13import tiktoken
14import traceback
15import os
16import json
17import shutil
19console = Console()
21def parse_ignore_file(ignore_file: Path) -> List[str]:
22 """Parse .llmclignore file and return list of patterns."""
23 if not ignore_file.exists():
24 return []
26 patterns = []
27 try:
28 with ignore_file.open() as f:
29 for line in f:
30 line = line.strip()
31 # Skip empty lines and comments
32 if line and not line.startswith('#'):
33 patterns.append(line)
34 except Exception as e:
35 print(f"Warning: Error reading {ignore_file}: {e}")
37 return patterns
39def should_ignore(path: Path, ignore_patterns: Optional[List[str]] = None) -> bool:
40 """Determine if a file or directory should be ignored based on patterns."""
41 if ignore_patterns is None:
42 ignore_patterns = []
44 path_str = str(path)
45 default_ignores = {
46 '.git', '__pycache__', '.pytest_cache', '.idea', '.vscode',
47 'node_modules', 'venv', 'env', 'dist', 'build', '.tox', 'htmlcov'
48 }
50 # Check default ignores
51 for pattern in default_ignores:
52 if pattern in path_str:
53 return True
55 # Check custom ignore patterns
56 for pattern in ignore_patterns:
57 if pattern in path_str:
58 return True
60 return False
64def is_binary(file_path: Path) -> bool:
65 """Check if a file is binary."""
66 try:
67 with file_path.open('rb') as f:
68 for block in iter(lambda: f.read(1024), b''):
69 if b'\0' in block:
70 return True
71 except Exception:
72 return True
73 return False
75def split_content_by_tokens(content: str, chunk_size: int = 100000) -> List[str]:
76 """
77 Split content into chunks based on token count.
78 Handles large content safely by pre-chunking before tokenization.
80 Args:
81 content (str): The content to split
82 chunk_size (int): Target size for each chunk in tokens
84 Returns:
85 List[str]: List of content chunks
86 """
87 if not content:
88 return ['']
90 try:
91 # First do a rough pre-chunking by characters to avoid stack overflow
92 MAX_CHUNK_CHARS = 100000 # Adjust this based on your needs
93 rough_chunks = []
95 for i in range(0, len(content), MAX_CHUNK_CHARS):
96 rough_chunks.append(content[i:i + MAX_CHUNK_CHARS])
98 encoder = tiktoken.get_encoding("cl100k_base")
99 final_chunks = []
101 # Process each rough chunk
102 for rough_chunk in rough_chunks:
103 tokens = encoder.encode(rough_chunk)
105 # Split into smaller chunks based on token count
106 for i in range(0, len(tokens), chunk_size):
107 chunk_tokens = tokens[i:i + chunk_size]
108 chunk_content = encoder.decode(chunk_tokens)
109 final_chunks.append(chunk_content)
111 return final_chunks
113 except Exception as e:
114 # Fallback to line-based splitting
115 return _split_by_lines(content, max_chunk_size=chunk_size)
117def _split_by_lines(content: str, max_chunk_size: int = 100000) -> List[str]:
118 """Split content by lines with a maximum chunk size."""
119 lines = content.splitlines(keepends=True) # Keep line endings
120 chunks = []
121 current_chunk = []
122 current_size = 0
124 for line in lines:
125 line_size = len(line.encode('utf-8'))
126 if current_size + line_size > max_chunk_size and current_chunk:
127 chunks.append(''.join(current_chunk))
128 current_chunk = [line]
129 current_size = line_size
130 else:
131 current_chunk.append(line)
132 current_size += line_size
134 if current_chunk:
135 chunks.append(''.join(current_chunk))
137 # Handle special case where we got no chunks
138 if not chunks and content:
139 return [content] # Return entire content as one chunk
141 return chunks
143def delete_and_create_output_dir(output_dir: Path) -> None:
144 """Delete the output directory if it exists and recreate it."""
145 if output_dir.exists() and output_dir.is_dir():
146 shutil.rmtree(output_dir)
147 output_dir.mkdir(parents=True, exist_ok=True)
149def export_full_content(path: Path, output_dir: Path, ignore_patterns: List[str]) -> None:
150 """Export full content of all files in separate token-limited files."""
151 file_content = []
153 # Export file system content
154 for file_path in path.rglob('*'):
155 if file_path.is_file() and not should_ignore(file_path, ignore_patterns) and not is_binary(file_path):
156 try:
157 content = file_path.read_text(encoding='utf-8')
158 file_content.append(f"\nFILE: {file_path}\n{'='*80}\n{content}\n")
159 except Exception as e:
160 console.print(f"[yellow]Warning: Error reading {file_path}: {str(e)}[/]")
161 continue
163 # Combine all content
164 full_content = "\n".join(file_content)
166 # Split and write content
167 chunks = split_content_by_tokens(full_content, chunk_size=100000)
168 for i, chunk in enumerate(chunks, 1):
169 output_file = output_dir / f'full_{i}.txt'
170 try:
171 output_file.write_text(chunk, encoding='utf-8')
172 console.print(f"[green]Created full content file: {output_file}[/]")
173 except Exception as e:
174 console.print(f"[yellow]Warning: Error writing {output_file}: {str(e)}[/]")
176def export_sql_content(sql_results: dict, output_dir: Path) -> None:
177 """Export full content of SQL objects in separate token-limited files."""
178 file_content = []
180 # Process stored procedures
181 for proc in sql_results.get('stored_procedures', []):
182 content = f"""
183STORED PROCEDURE: [{proc['schema']}].[{proc['name']}]
184{'='*80}
185{proc['definition']}
186"""
187 file_content.append(content)
189 # Process views
190 for view in sql_results.get('views', []):
191 content = f"""
192VIEW: [{view['schema']}].[{view['name']}]
193{'='*80}
194{view['definition']}
195"""
196 file_content.append(content)
198 # Process functions
199 for func in sql_results.get('functions', []):
200 content = f"""
201FUNCTION: [{func['schema']}].[{func['name']}]
202{'='*80}
203{func['definition']}
204"""
205 file_content.append(content)
207 # Split and write content
208 if file_content:
209 full_content = "\n".join(file_content)
210 chunks = split_content_by_tokens(full_content, chunk_size=100000)
212 for i, chunk in enumerate(chunks, 1):
213 output_file = output_dir / f'sql_full_{i}.txt'
214 try:
215 output_file.write_text(chunk, encoding='utf-8')
216 console.print(f"[green]Created SQL content file: {output_file}[/]")
217 except Exception as e:
218 console.print(f"[yellow]Warning: Error writing {output_file}: {str(e)}[/]")
220def _combine_fs_results(combined: dict, result: Union[dict, AnalysisResult]) -> None:
221 """Combine file system analysis results."""
222 if isinstance(result, AnalysisResult):
223 result_dict = result.dict() # Convert AnalysisResult to dict
224 else:
225 result_dict = result # Already a dict
227 # Update project stats
228 stats = result_dict.get('summary', {}).get('project_stats', {})
229 combined['summary']['project_stats']['total_files'] += stats.get('total_files', 0)
230 combined['summary']['project_stats']['lines_of_code'] += stats.get('lines_of_code', 0)
232 # Update code metrics
233 metrics = result_dict.get('summary', {}).get('code_metrics', {})
234 for metric_type in ['functions', 'classes']:
235 if metric_type in metrics:
236 for key in ['count', 'with_docs', 'complex']:
237 if key in metrics[metric_type]:
238 combined['summary']['code_metrics'][metric_type][key] += metrics[metric_type][key]
240 # Update imports
241 if 'imports' in metrics:
242 combined['summary']['code_metrics']['imports']['count'] += metrics['imports'].get('count', 0)
243 unique_imports = metrics['imports'].get('unique', set())
244 if isinstance(unique_imports, (set, list)):
245 combined['summary']['code_metrics']['imports']['unique'].update(unique_imports)
247 # Update maintenance info
248 maintenance = result_dict.get('summary', {}).get('maintenance', {})
249 combined['summary']['maintenance']['todos'].extend(maintenance.get('todos', []))
251 # Update structure info
252 structure = result_dict.get('summary', {}).get('structure', {})
253 if 'directories' in structure:
254 dirs = structure['directories']
255 if isinstance(dirs, (set, list)):
256 combined['summary']['structure']['directories'].update(dirs)
258 # Update insights and files
259 if 'insights' in result_dict:
260 combined['insights'].extend(result_dict['insights'])
261 if 'files' in result_dict:
262 combined['files'].update(result_dict['files'])
264def _combine_results(results: List[Union[dict, AnalysisResult]]) -> AnalysisResult:
265 """Combine multiple analysis results into a single result."""
266 combined = {
267 'summary': {
268 'project_stats': {
269 'total_files': 0,
270 'total_sql_objects': 0,
271 'by_type': {},
272 'lines_of_code': 0,
273 'avg_file_size': 0
274 },
275 'code_metrics': {
276 'functions': {'count': 0, 'with_docs': 0, 'complex': 0},
277 'classes': {'count': 0, 'with_docs': 0},
278 'sql_objects': {'procedures': 0, 'views': 0, 'functions': 0},
279 'imports': {'count': 0, 'unique': set()}
280 },
281 'maintenance': {
282 'todos': [],
283 'comments_ratio': 0,
284 'doc_coverage': 0
285 },
286 'structure': {
287 'directories': set(),
288 'entry_points': [],
289 'core_files': [],
290 'sql_dependencies': []
291 }
292 },
293 'insights': [],
294 'files': {}
295 }
297 for result in results:
298 if isinstance(result, dict) and ('stored_procedures' in result or 'views' in result):
299 _combine_sql_results(combined, result)
300 else:
301 # If result is AnalysisResult, convert to dict using to_json and json.loads
302 if isinstance(result, AnalysisResult):
303 import json
304 result_dict = json.loads(result.to_json())
305 else:
306 result_dict = result
307 _combine_fs_results(combined, result_dict)
309 # Calculate final metrics
310 total_items = (combined['summary']['project_stats']['total_files'] +
311 combined['summary']['project_stats']['total_sql_objects'])
313 if total_items > 0:
314 combined['summary']['project_stats']['avg_file_size'] = (
315 combined['summary']['project_stats']['lines_of_code'] / total_items
316 )
318 # Convert sets to lists for JSON serialization
319 combined['summary']['code_metrics']['imports']['unique'] = list(
320 combined['summary']['code_metrics']['imports']['unique']
321 )
322 combined['summary']['structure']['directories'] = list(
323 combined['summary']['structure']['directories']
324 )
326 return AnalysisResult(**combined)
329def _combine_sql_results(combined: dict, sql_result: dict) -> None:
330 """Combine SQL results with proper object counting."""
331 # Count objects
332 proc_count = len(sql_result.get('stored_procedures', []))
333 view_count = len(sql_result.get('views', []))
334 func_count = len(sql_result.get('functions', []))
336 # Update stats
337 combined['summary']['project_stats']['total_sql_objects'] += proc_count + view_count + func_count
338 combined['summary']['code_metrics']['sql_objects']['procedures'] += proc_count
339 combined['summary']['code_metrics']['sql_objects']['views'] += view_count
340 combined['summary']['code_metrics']['sql_objects']['functions'] += func_count
342 # Add objects to files
343 for proc in sql_result.get('stored_procedures', []):
344 key = f"stored_proc_{proc['name']}"
345 combined['files'][key] = proc
346 for view in sql_result.get('views', []):
347 key = f"view_{view['name']}"
348 combined['files'][key] = view
353@click.command()
354@click.argument('path', type=click.Path(exists=True), default='.')
355@click.option('--output', '-o', help='Output directory', default='.codelens')
356@click.option('--format', '-f', type=click.Choice(['txt', 'json']), default='txt')
357@click.option('--full', is_flag=True, help='Export full file/object contents in separate files')
358@click.option('--debug', is_flag=True, help='Enable debug output')
359@click.option('--sql-server', help='SQL Server connection string')
360@click.option('--sql-database', help='SQL Database to analyze')
361@click.option('--sql-config', help='Path to SQL configuration file')
362@click.option('--exclude', '-e', multiple=True, help='Patterns to exclude (can be used multiple times)')
363def main(path: str, output: str, format: str, full: bool, debug: bool,
364 sql_server: str, sql_database: str, sql_config: str, exclude: tuple):
365 try:
366 # Convert to absolute paths
367 path = Path(path).resolve()
368 output_path = Path(output).resolve()
370 # Ensure output directory exists
371 try:
372 delete_and_create_output_dir(output_path)
373 except Exception as e:
374 console.print(f"[red]Error creating output directory: {str(e)}[/]")
375 return 1
377 if debug:
378 console.print(f"[blue]Output directory: {output_path}[/]")
380 # Rest of the main function remains unchanged
381 results = []
383 # Load SQL configuration if provided
384 if sql_config:
385 try:
386 with open(sql_config) as f:
387 sql_settings = json.load(f)
388 sql_server = sql_settings.get('server')
389 sql_database = sql_settings.get('database')
391 # Set environment variables if provided in config
392 for key, value in sql_settings.get('env', {}).items():
393 os.environ[key] = value
394 except Exception as e:
395 console.print(f"[yellow]Warning: Error loading SQL config: {str(e)}[/]")
396 if debug:
397 console.print(traceback.format_exc())
399 # Run SQL analysis if requested
400 if sql_server or sql_database or os.getenv('MSSQL_SERVER'):
401 console.print("[bold blue]📊 Starting SQL Analysis...[/]")
402 analyzer = SQLServerAnalyzer()
403 try:
404 analyzer.connect(sql_server) # Will use env vars if not provided
405 if sql_database:
406 console.print(f"[blue]Analyzing database: {sql_database}[/]")
407 sql_result = analyzer.analyze_database(sql_database)
408 results.append(sql_result)
410 if full:
411 console.print("[blue]Exporting SQL content...[/]")
412 export_sql_content(sql_result, output_path)
413 else:
414 # Get all databases the user has access to
415 databases = analyzer.list_databases()
416 for db in databases:
417 console.print(f"[blue]Analyzing database: {db}[/]")
418 sql_result = analyzer.analyze_database(db)
419 results.append(sql_result)
421 if full:
422 console.print(f"[blue]Exporting SQL content for {db}...[/]")
423 export_sql_content(sql_result, output_path)
425 except Exception as e:
426 console.print(f"[yellow]Warning during SQL analysis: {str(e)}[/]")
427 if debug:
428 console.print(traceback.format_exc())
430 # Run file system analysis
431 console.print("[bold blue]📁 Starting File System Analysis...[/]")
432 analyzer = ProjectAnalyzer()
433 fs_results = analyzer.analyze(path)
434 results.append(fs_results)
436 # Combine results
437 combined_results = _combine_results(results)
439 if debug:
440 console.print("[blue]Analysis complete, writing results...[/]")
442 # Write results
443 result_file = output_path / f'analysis.{format}'
444 try:
445 # Ensure output directory exists
446 output_path.mkdir(parents=True, exist_ok=True)
448 content = combined_results.to_json() if format == 'json' else combined_results.to_text()
449 result_file.write_text(content, encoding='utf-8')
450 except Exception as e:
451 console.print(f"[red]Error writing results: {str(e)}[/]")
452 return 1
454 console.print(f"[bold green]✨ Analysis saved to {result_file}[/]")
456 # Handle full content export
457 if full:
458 console.print("[bold blue]📦 Exporting full contents...[/]")
459 try:
460 ignore_patterns = parse_ignore_file(Path('.llmclignore')) + list(exclude)
461 export_full_content(path, output_path, ignore_patterns)
462 console.print("[bold green]✨ Full content export complete![/]")
463 except Exception as e:
464 console.print(f"[yellow]Warning during full export: {str(e)}[/]")
465 if debug:
466 console.print(traceback.format_exc())
468 # Friendly message to prompt users to give a star
469 console.print("\n [bold yellow] ⭐⭐⭐⭐⭐ If you like this tool, please consider giving it a star on GitHub![/]")
470 console.print("[bold blue]Visit: https://github.com/SikamikanikoBG/codelens.git[/]")
472 return 0
474 except Exception as e:
475 console.print("[bold red]Error occurred:[/]")
476 if debug:
477 console.print(traceback.format_exc())
478 else:
479 console.print(f"[bold red]Error: {str(e)}[/]")
480 return 1
482if __name__ == '__main__':
483 main()