Source code for scitex_db._check_health

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-09-10 08:05:21 (ywatanabe)"
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_check_health.py
# ----------------------------------------
from __future__ import annotations
import os
__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------

import sqlite3
from typing import Any, Dict, List

import pandas as pd


[docs] def check_health( db_path: str, verbose: bool = True, fix_issues: bool = False ) -> Dict[str, Any]: """Comprehensive health check for SQLite database Parameters ---------- db_path : str Path to database file verbose : bool, default True Print detailed results fix_issues : bool, default False Attempt to fix detected issues Returns ------- dict Health check results """ results = { "database_path": db_path, "exists": False, "accessible": False, "integrity": "UNKNOWN", "tables": [], "issues": [], "stats": {}, "recommendations": [], "loadability": {"rows": False, "arrays": False, "blobs": False}, } # Check file existence if not os.path.exists(db_path): results["issues"].append(f"Database file does not exist: {db_path}") return results results["exists"] = True results["stats"]["file_size_mb"] = os.path.getsize(db_path) / (1024 * 1024) try: with sqlite3.connect(db_path, timeout=10.0) as conn: cursor = conn.cursor() results["accessible"] = True # Integrity check integrity_result = cursor.execute( "PRAGMA integrity_check" ).fetchall() results["integrity"] = ( "OK" if integrity_result[0][0] == "ok" else "CORRUPTED" ) if results["integrity"] == "CORRUPTED": results["issues"].append("Database integrity check failed") results["recommendations"].append( "Run VACUUM or restore from backup" ) # Get table information cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") table_names = [row[0] for row in cursor.fetchall()] results["stats"]["table_count"] = len(table_names) total_rows = 0 for table_name in table_names: cursor.execute(f"SELECT COUNT(*) FROM {table_name}") row_count = cursor.fetchone()[0] total_rows += row_count results["tables"].append( {"name": table_name, "row_count": row_count} ) results["stats"]["total_rows"] = total_rows # Check for empty tables empty_tables = [ t for t in results["tables"] if t["row_count"] == 0 ] if empty_tables: results["issues"].append( f"{len(empty_tables)} empty tables found" ) # Check database settings cursor.execute("PRAGMA journal_mode") journal_mode = cursor.fetchone()[0] results["stats"]["journal_mode"] = journal_mode cursor.execute("PRAGMA page_size") page_size = cursor.fetchone()[0] results["stats"]["page_size"] = page_size cursor.execute("PRAGMA page_count") page_count = cursor.fetchone()[0] results["stats"]["page_count"] = page_count # Check for foreign key violations cursor.execute("PRAGMA foreign_key_check") fk_violations = cursor.fetchall() if fk_violations: results["issues"].append( f"{len(fk_violations)} foreign key violations" ) results["recommendations"].append( "Fix foreign key constraints" ) # Performance recommendations if results["stats"]["total_rows"] > 10000: cursor.execute( "SELECT COUNT(*) FROM sqlite_master WHERE type='index'" ) index_count = cursor.fetchone()[0] if index_count < len(table_names): results["recommendations"].append( "Consider adding indexes for better performance" ) # Auto-vacuum check cursor.execute("PRAGMA auto_vacuum") auto_vacuum = cursor.fetchone()[0] if auto_vacuum == 0 and results["stats"]["file_size_mb"] > 100: results["recommendations"].append( "Consider enabling auto_vacuum for large databases" ) # Attempt fixes if requested if fix_issues and results["issues"]: if "CORRUPTED" in str(results["issues"]): if verbose: print("Attempting to fix corruption with VACUUM...") try: cursor.execute("VACUUM") conn.commit() results["recommendations"].append( "Database vacuumed successfully" ) except sqlite3.Error as ee: results["issues"].append(f"VACUUM failed: {ee}") except sqlite3.Error as ee: results["issues"].append(f"Database connection failed: {ee}") except Exception as ee: results["issues"].append(f"Unexpected error: {ee}") # Test loadability results["loadability"]["rows"] = is_rows_loadable(db_path, verbose=verbose) results["loadability"]["arrays"] = is_arrays_loadable( db_path, verbose=verbose ) results["loadability"]["blobs"] = is_blobs_loadable( db_path, verbose=verbose ) # Add loadability issues if not results["loadability"]["rows"]: results["issues"].append("Rows are not loadable") if not results["loadability"]["arrays"]: results["issues"].append("Arrays are not loadable") if not results["loadability"]["blobs"]: results["issues"].append("Blobs are not loadable") # Generate summary health_score = 100 health_score -= len(results["issues"]) * 20 health_score = max(0, health_score) results["health_score"] = health_score results["status"] = ( "HEALTHY" if health_score >= 80 else "ISSUES_FOUND" if health_score >= 50 else "CRITICAL" ) if verbose: print(f"\nDatabase Health Check: {db_path}") print(f"Status: {results['status']} (Score: {health_score}/100)") print(f"File size: {results['stats'].get('file_size_mb', 0):.2f} MB") print(f"Tables: {results['stats'].get('table_count', 0)}") print(f"Total rows: {results['stats'].get('total_rows', 0)}") # Loadability status loadability = results["loadability"] print( f"Loadability - Rows: {'✓' if loadability['rows'] else '✗'}, " f"Arrays: {'✓' if loadability['arrays'] else '✗'}, " f"Blobs: {'✓' if loadability['blobs'] else '✗'}" ) if results["issues"]: print(f"\nIssues found ({len(results['issues'])}):") for issue in results["issues"]: print(f" - {issue}") if results["recommendations"]: print(f"\nRecommendations ({len(results['recommendations'])}):") for rec in results["recommendations"]: print(f" - {rec}") return results
def is_rows_loadable( db_path: str, table_name: str = None, verbose: bool = False ) -> bool: """Check if database/table is loadable without errors Parameters ---------- db_path : str Path to database file table_name : str, optional Specific table to test. If None, tests all tables verbose : bool, default False Print detailed error information Returns ------- bool True if loadable, False otherwise """ import scitex as stx try: with stx.io.load(db_path) as db: if table_name: db.get_rows(table_name, limit=1) return True else: table_names = db.get_table_names() for tname in table_names: db.get_rows(tname, limit=1) return True except Exception as ee: if verbose: print(f"Load failed for {db_path}/{table_name}: {ee}") return False def is_arrays_loadable( db_path: str, table_name: str = None, verbose: bool = False ) -> bool: """Check if arrays in database/table are loadable Parameters ---------- db_path : str Path to database file table_name : str, optional Specific table to test. If None, tests all tables verbose : bool, default False Print detailed error information Returns ------- bool True if arrays loadable, False otherwise """ import scitex as stx try: with stx.io.load(db_path) as db: if table_name: db.load_arrays(table_name) return True else: table_names = db.get_table_names() for tname in table_names: try: db.load_arrays(tname) except: continue return True except Exception as ee: if verbose: print(f"Array load failed for {db_path}/{table_name}: {ee}") return False def is_blobs_loadable( db_path: str, table_name: str = None, verbose: bool = False ) -> bool: """Check if blobs in database/table are loadable Parameters ---------- db_path : str Path to database file table_name : str, optional Specific table to test. If None, tests all tables verbose : bool, default False Print detailed error information Returns ------- bool True if blobs loadable, False otherwise """ import scitex as stx try: with stx.io.load(db_path) as db: if table_name: db.load_blob(table_name) return True else: table_names = db.get_table_names() for tname in table_names: try: db.load_blob(tname) except: continue return True except Exception as ee: if verbose: print(f"Blob load failed for {db_path}/{table_name}: {ee}") return False
[docs] def batch_health_check( db_paths: List[str], verbose: bool = False, fix_issues: bool = False ) -> Dict[str, Dict[str, Any]]: """ Run health check on multiple databases Parameters ---------- db_paths : list of str List of database paths verbose : bool, default False Print results for each database fix_issues : bool, default False Attempt to fix issues Returns ------- dict Results for each database """ results = {} print(f"Running health check on {len(db_paths)} databases...") for db_path in db_paths: if verbose: print(f"\nChecking: {db_path}") results[db_path] = check_health( db_path=db_path, verbose=verbose, fix_issues=fix_issues ) # Summary healthy = sum(1 for r in results.values() if r["status"] == "HEALTHY") issues = sum(1 for r in results.values() if r["status"] == "ISSUES_FOUND") critical = sum(1 for r in results.values() if r["status"] == "CRITICAL") print(f"\nBatch Health Check Summary:") print(f" Healthy: {healthy}") print(f" Issues: {issues}") print(f" Critical: {critical}") return results
# EOF