Coverage for src\llm_code_lens\analyzer\sql.py: 75%
177 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2025-01-12 10:23 +0200
1import re
2import os
3import pyodbc
4from pathlib import Path
5from typing import Dict, List, Optional
7class SQLServerAnalyzer:
8 """SQL Server code analyzer for stored procedures and views."""
10 def __init__(self):
11 self.conn = None
12 self.cursor = None
14 def connect(self, connection_string: Optional[str] = None) -> None:
15 """
16 Connect to SQL Server using either provided connection string or environment variables.
18 Args:
19 connection_string: Optional connection string. If not provided, uses environment variables.
20 """
21 try:
22 if connection_string:
23 self.conn = pyodbc.connect(connection_string)
24 else:
25 # Use environment variables
26 server = os.getenv('MSSQL_SERVER')
27 username = os.getenv('MSSQL_USERNAME')
28 password = os.getenv('MSSQL_PASSWORD')
30 if not server:
31 raise ValueError("No server specified. Provide connection string or set MSSQL_SERVER environment variable")
33 # Build connection string
34 conn_str = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server}'
35 if username and password:
36 conn_str += f';UID={username};PWD={password}'
37 else:
38 conn_str += ';Trusted_Connection=yes'
40 self.conn = pyodbc.connect(conn_str)
42 self.cursor = self.conn.cursor()
44 except Exception as e:
45 raise ConnectionError(f"Failed to connect to SQL Server: {str(e)}")
47 def list_databases(self) -> List[str]:
48 """List all accessible databases."""
49 if not self.cursor:
50 raise ConnectionError("Not connected to SQL Server")
52 self.cursor.execute("SELECT name FROM sys.databases WHERE database_id > 4") # Skip system DBs
53 return [row.name for row in self.cursor.fetchall()]
55 def analyze_database(self, database: str) -> Dict:
56 """
57 Analyze a specific database.
59 Args:
60 database: Name of the database to analyze
62 Returns:
63 Dict containing analysis of stored procedures, views, and functions
64 """
65 if not self.cursor:
66 raise ConnectionError("Not connected to SQL Server")
68 # Switch to specified database
69 self.cursor.execute(f"USE [{database}]")
71 return {
72 'stored_procedures': self._analyze_stored_procedures(),
73 'views': self._analyze_views(),
74 'functions': self._analyze_functions()
75 }
77 def _analyze_stored_procedures(self) -> List[Dict]:
78 """Analyze stored procedures in current database."""
79 self.cursor.execute("""
80 SELECT
81 OBJECT_SCHEMA_NAME(p.object_id) as schema_name,
82 p.name,
83 m.definition,
84 p.create_date,
85 p.modify_date
86 FROM sys.procedures p
87 INNER JOIN sys.sql_modules m ON p.object_id = m.object_id
88 ORDER BY schema_name, p.name
89 """)
91 procedures = []
92 for row in self.cursor.fetchall():
93 proc_def = row.definition
95 # Analyze the procedure
96 proc_analysis = {
97 'schema': row.schema_name,
98 'name': row.name,
99 'definition': proc_def,
100 'metrics': {
101 'lines': len(proc_def.splitlines()),
102 'complexity': self._estimate_complexity(proc_def)
103 },
104 'parameters': self._extract_parameters(proc_def),
105 'dependencies': self._extract_dependencies(proc_def),
106 'todos': [],
107 'comments': []
108 }
110 # Extract comments and TODOs
111 comments, todos = self._extract_comments_and_todos(proc_def)
112 proc_analysis['comments'] = comments
113 proc_analysis['todos'] = todos
115 procedures.append(proc_analysis)
117 return procedures
119 def _analyze_views(self) -> List[Dict]:
120 """Analyze views in current database."""
121 self.cursor.execute("""
122 SELECT
123 OBJECT_SCHEMA_NAME(v.object_id) as schema_name,
124 v.name,
125 m.definition,
126 v.create_date,
127 v.modify_date
128 FROM sys.views v
129 INNER JOIN sys.sql_modules m ON v.object_id = m.object_id
130 ORDER BY schema_name, v.name
131 """)
133 views = []
134 for row in self.cursor.fetchall():
135 view_def = row.definition
137 # Analyze the view
138 view_analysis = {
139 'schema': row.schema_name,
140 'name': row.name,
141 'definition': view_def,
142 'metrics': {
143 'lines': len(view_def.splitlines()),
144 'complexity': self._estimate_complexity(view_def)
145 },
146 'dependencies': self._extract_dependencies(view_def),
147 'todos': [],
148 'comments': []
149 }
151 # Extract comments and TODOs
152 comments, todos = self._extract_comments_and_todos(view_def)
153 view_analysis['comments'] = comments
154 view_analysis['todos'] = todos
156 views.append(view_analysis)
158 return views
160 def _analyze_functions(self) -> List[Dict]:
161 """Analyze functions in current database."""
162 self.cursor.execute("""
163 SELECT
164 OBJECT_SCHEMA_NAME(f.object_id) as schema_name,
165 f.name,
166 m.definition,
167 f.create_date,
168 f.modify_date,
169 f.type
170 FROM sys.objects f
171 INNER JOIN sys.sql_modules m ON f.object_id = m.object_id
172 WHERE f.type IN ('FN', 'IF', 'TF') -- Scalar, Inline Table, Table-valued
173 ORDER BY schema_name, f.name
174 """)
176 functions = []
177 for row in self.cursor.fetchall():
178 func_def = row.definition
180 # Analyze the function
181 func_analysis = {
182 'schema': row.schema_name,
183 'name': row.name,
184 'definition': func_def,
185 'metrics': {
186 'lines': len(func_def.splitlines()),
187 'complexity': self._estimate_complexity(func_def)
188 },
189 'parameters': self._extract_parameters(func_def),
190 'dependencies': self._extract_dependencies(func_def),
191 'todos': [],
192 'comments': []
193 }
195 # Extract comments and TODOs
196 comments, todos = self._extract_comments_and_todos(func_def)
197 func_analysis['comments'] = comments
198 func_analysis['todos'] = todos
200 functions.append(func_analysis)
202 return functions
204 def analyze_file(self, file_path: Path) -> dict:
205 """Analyze a SQL file."""
206 with open(file_path, 'r', encoding='utf-8') as f:
207 content = f.read()
209 analysis = {
210 'type': 'sql',
211 'metrics': {
212 'loc': len(content.splitlines()),
213 'complexity': self._estimate_complexity(content)
214 },
215 'objects': [],
216 'parameters': [],
217 'comments': [],
218 'todos': [],
219 'dependencies': self._extract_dependencies(content)
220 }
222 # Extract SQL objects
223 objects = self._extract_sql_objects(content)
224 if objects:
225 analysis['objects'] = objects
227 # Extract and update parameters with comments
228 params = self._extract_parameters(content)
229 self._update_params_with_comments(params, content)
230 if params:
231 analysis['parameters'] = params
233 # Extract comments and TODOs
234 comments, todos = self._extract_comments_and_todos(content)
235 analysis['comments'] = comments
236 analysis['todos'] = todos
238 return analysis
240 def __del__(self):
241 """Cleanup database connections."""
242 if self.cursor:
243 self.cursor.close()
244 if self.conn:
245 self.conn.close()
247 # Rest of the existing methods remain the same
248 def _extract_sql_objects(self, content: str) -> List[dict]:
249 """Extract SQL objects like procedures, functions, and views."""
250 objects = []
252 # Match CREATE/ALTER statements
253 patterns = {
254 'procedure': r'CREATE\s+(?:OR\s+ALTER\s+)?PROCEDURE\s+([^\s]+)',
255 'function': r'CREATE\s+(?:OR\s+ALTER\s+)?FUNCTION\s+([^\s]+)',
256 'view': r'CREATE\s+(?:OR\s+ALTER\s+)?VIEW\s+([^\s]+)'
257 }
259 for obj_type, pattern in patterns.items():
260 matches = re.finditer(pattern, content, re.IGNORECASE)
261 for match in matches:
262 name = match.group(1)
263 # Find the object's body
264 start_pos = match.start()
265 # Look for GO or end of file
266 end_match = re.search(r'\bGO\b', content[start_pos:], re.IGNORECASE)
267 if end_match:
268 end_pos = start_pos + end_match.start()
269 definition = content[start_pos:end_pos].strip()
270 else:
271 definition = content[start_pos:].strip()
273 objects.append({
274 'type': obj_type,
275 'name': name,
276 'definition': definition,
277 'loc': len(definition.splitlines()),
278 'complexity': self._estimate_complexity(definition)
279 })
281 return objects
283 def _extract_parameters(self, content: str) -> List[dict]:
284 """Extract parameters from procedure or function definitions."""
285 params = []
286 # Find the procedure declaration
287 proc_match = re.search(
288 r'CREATE\s+(?:OR\s+ALTER\s+)?(?:PROCEDURE|FUNCTION)\s+([^\s]+)([\s\S]+?)AS\b',
289 content,
290 re.IGNORECASE
291 )
293 if proc_match:
294 param_section = proc_match.group(2)
295 # Extract each parameter line, handling multiline declarations
296 param_lines = re.findall(
297 r'@\w+\s+[^,@]+(?:\s*=\s*[^,]+)?(?=\s*,|\s*AS\b|\s*$)',
298 param_section,
299 re.IGNORECASE | re.DOTALL
300 )
302 for param_line in param_lines:
303 # Extract individual parameter components
304 param_match = re.match(
305 r'@(\w+)\s+([^=\s]+(?:\([^)]*\))?)\s*(?:=\s*([^,\s][^,]*)?)?',
306 param_line.strip()
307 )
309 if param_match:
310 name, data_type, default = param_match.groups()
311 param_info = {
312 'name': name,
313 'data_type': data_type.strip()
314 }
316 if default:
317 param_info['default'] = default.strip()
319 # Look for inline comment on the same line
320 comment_match = re.search(r'--\s*(.*?)(?:\r?\n|$)', param_line)
321 if comment_match:
322 param_info['description'] = comment_match.group(1).strip()
324 params.append(param_info)
326 return params
328 def _update_params_with_comments(self, params: List[dict], content: str) -> None:
329 """Update parameter documentation from nearby comments."""
330 lines = content.splitlines()
331 for i, line in enumerate(lines):
332 if '--' in line and any(param['name'] in line for param in params):
333 comment = line[line.index('--')+2:].strip()
334 param_name = next(
335 (param['name'] for param in params if param['name'] in line),
336 None
337 )
338 if param_name:
339 param = next(p for p in params if p['name'] == param_name)
340 if 'description' not in param:
341 param['description'] = comment
343 def _extract_dependencies(self, content: str) -> List[str]:
344 """Extract table and view dependencies."""
345 deps = set()
347 # Define patterns for table references
348 patterns = [
349 # FROM, JOIN, UPDATE, etc. followed by table name
350 r'(?:FROM|JOIN|INTO|UPDATE)\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b(?!\s*[=@])',
351 # INSERT INTO pattern
352 r'INSERT\s+INTO\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b',
353 # REFERENCES in constraints
354 r'REFERENCES\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b'
355 ]
357 # Define words that should not be treated as table names
358 excluded_words = {
359 'null', 'select', 'where', 'group', 'order', 'having',
360 'exists', 'between', 'like', 'in', 'is', 'not', 'and', 'or',
361 'operation', 'existing' # Add common variables
362 }
364 for pattern in patterns:
365 for match in re.finditer(pattern, content, re.IGNORECASE):
366 table_name = match.group(1).strip()
367 if table_name.lower() not in excluded_words:
368 deps.add(table_name)
370 return sorted(list(deps))
372 def _extract_comments_and_todos(self, content: str) -> tuple:
373 """Extract comments and TODOs from SQL code."""
374 comments = []
375 todos = []
377 # Match inline comments and block comments
378 patterns = [
379 (r'--([^\n]+)', False), # Inline comments
380 (r'/\*[\s\S]*?\*/', True) # Block comments
381 ]
383 for pattern, is_multiline in patterns:
384 for match in re.finditer(pattern, content):
385 comment = match.group()
386 if is_multiline:
387 comment = comment.strip('/*').strip('*/')
388 else:
389 comment = comment.strip('--')
390 comment = comment.strip()
392 # Skip empty comments and parameter comments
393 if not comment or comment.startswith('@'):
394 continue
396 line_num = content[:match.start()].count('\n') + 1
398 if any(marker in comment.upper()
399 for marker in ['TODO', 'FIXME', 'XXX']):
400 todos.append({
401 'text': comment,
402 'line': line_num
403 })
404 else:
405 comments.append({
406 'text': comment,
407 'line': line_num
408 })
410 return comments, todos
412 def _estimate_complexity(self, content: str) -> int:
413 """Estimate SQL complexity based on various factors."""
414 complexity = 0
415 content_lower = content.lower()
417 # Control flow complexity
418 complexity += content_lower.count('if ') * 2
419 complexity += content_lower.count('else ') * 2
420 complexity += content_lower.count('case ') * 2
421 complexity += content_lower.count('while ') * 3
422 complexity += content_lower.count('cursor') * 4
424 # Query complexity
425 complexity += content_lower.count('join ') * 2
426 complexity += content_lower.count('where ') * 2
427 complexity += content_lower.count('group by ') * 2
428 complexity += content_lower.count('having ') * 3
429 complexity += content_lower.count('union ') * 3
431 # Transaction complexity
432 complexity += content_lower.count('transaction') * 2
433 complexity += content_lower.count('try') * 2
434 complexity += content_lower.count('catch') * 2
436 return complexity