Source code for scitex_scholar.impact_factor.ImpactFactorEngine

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-10-11 23:58:17 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/extra/JournalMetrics.py
# ----------------------------------------
from __future__ import annotations

import os

__FILE__ = "./src/scitex/scholar/extra/JournalMetrics.py"
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------

__FILE__ = __file__

"""
Functionalities:
- Retrieves journal impact factors and quartiles
- Provides standalone journal metrics lookup
- Caches results for performance optimization

Dependencies:
- packages:
  - impact_factor

Input:
- Journal names as strings

Output:
- Dictionary containing impact factor and quartile data
"""

"""Imports"""
from functools import lru_cache
from typing import Dict, Optional

from scitex_logging import getLogger

from .jcr.ImpactFactorJCREngine import ImpactFactorJCREngine

logger = getLogger(__name__)

"""Parameters"""

"""Functions & Classes"""


[docs] class ImpactFactorEngine: """ Impact factor service - finds journal metrics from JCR database. Uses JCR database lookup with caching for performance. """
[docs] def __init__(self, cache_size: int = 1000): """Initialize with optional cache size.""" self.name = self.__class__.__name__ self.jcr_engine = ImpactFactorJCREngine() self.get_metrics = lru_cache(maxsize=cache_size)(self._get_metrics_uncached)
[docs] def _get_jcr_year(self) -> str: """Extract JCR year from database or package metadata. Returns "Source Unknown" if the year can't be determined. Any underlying error is logged at debug level so callers can distinguish a truly unknown year from a misconfigured DB. """ try: import sqlite3 with sqlite3.connect(self.jcr_engine.dbfile) as conn: cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") tables = [row[0] for row in cursor.fetchall()] if "metadata" in tables: cursor.execute( "SELECT value FROM metadata WHERE key='year' OR key='jcr_year'" ) year_result = cursor.fetchone() if year_result: return f"JCR {year_result[0]}" import re db_path = str(self.jcr_engine.dbfile) year_match = re.search(r"20\d{2}", db_path) if year_match: return f"JCR {year_match.group()}" except Exception as exc: logger.debug( f"ImpactFactorEngine: JCR year lookup failed " f"({type(exc).__name__}: {exc}); returning 'Source Unknown'" ) return "Source Unknown"
[docs] def _get_metrics_uncached(self, journal_name: str) -> Optional[Dict]: """Get journal metrics without caching.""" if not self.jcr_engine or not journal_name: return None try: results = self.jcr_engine.search(journal_name) if results: result = results[0] return { "impact_factor": float(result.get("factor", 0)), "quartile": result.get("jcr", "Unknown"), "source": self._get_jcr_year(), } except Exception: pass return None
[docs] def get_database_info(self) -> Dict: """Get information about the impact factor database.""" if not self.jcr_engine: return {"error": "Database not available"} import sqlite3 db_path = self.jcr_engine.dbfile with sqlite3.connect(db_path) as conn: cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") tables = [row[0] for row in cursor.fetchall()] info = { "database_path": str(db_path), "tables": tables, "total_journals": 0, "data_year": self._get_jcr_year(), } if tables: main_table = tables[0] cursor.execute(f"SELECT COUNT(*) FROM {main_table}") info["total_journals"] = cursor.fetchone()[0] cursor.execute(f"PRAGMA table_info({main_table})") columns = [row[1] for row in cursor.fetchall()] info["columns"] = columns cursor.execute(f"SELECT * FROM {main_table} LIMIT 3") sample_data = cursor.fetchall() info["sample_data"] = sample_data return info
[docs] def get_journal_metrics(journal_name: str) -> Optional[Dict]: """Standalone function to get journal metrics. Parameters ---------- journal_name : str Name of the journal Returns ------- Optional[Dict] Dictionary with impact_factor, quartile, and source keys Example ------- >>> metrics = get_journal_metrics("Nature") >>> print(metrics["impact_factor"]) 64.8 """ engine = ImpactFactorEngine() return engine.get_metrics(journal_name)
if __name__ == "__main__": def main(): """Demonstrate journal metrics lookup.""" metrics_instance = ImpactFactorEngine() # Show database info print("Database Information") print("=" * 50) db_info = metrics_instance.get_database_info() for key, value in db_info.items(): print(f"{key}: {value}") print("\nJournal Metrics Lookup Demo") print("=" * 50) test_journals = ["Nature", "Science", "Cell"] for journal in test_journals: print(f"\nJournal: {journal}") metrics = get_journal_metrics(journal) if metrics: for key, value in metrics.items(): print(f" {key}: {value}") else: print(" No metrics found") main() # python -m scitex_scholar.extra.JournalMetrics # EOF