#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-10-11 23:58:17 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/extra/JournalMetrics.py
# ----------------------------------------
from __future__ import annotations
import os
__FILE__ = "./src/scitex/scholar/extra/JournalMetrics.py"
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------
__FILE__ = __file__
"""
Functionalities:
- Retrieves journal impact factors and quartiles
- Provides standalone journal metrics lookup
- Caches results for performance optimization
Dependencies:
- packages:
- impact_factor
Input:
- Journal names as strings
Output:
- Dictionary containing impact factor and quartile data
"""
"""Imports"""
from functools import lru_cache
from typing import Dict, Optional
from scitex_logging import getLogger
from .jcr.ImpactFactorJCREngine import ImpactFactorJCREngine
logger = getLogger(__name__)
"""Parameters"""
"""Functions & Classes"""
[docs]
class ImpactFactorEngine:
"""
Impact factor service - finds journal metrics from JCR database.
Uses JCR database lookup with caching for performance.
"""
[docs]
def __init__(self, cache_size: int = 1000):
"""Initialize with optional cache size."""
self.name = self.__class__.__name__
self.jcr_engine = ImpactFactorJCREngine()
self.get_metrics = lru_cache(maxsize=cache_size)(self._get_metrics_uncached)
[docs]
def _get_jcr_year(self) -> str:
"""Extract JCR year from database or package metadata.
Returns "Source Unknown" if the year can't be determined. Any
underlying error is logged at debug level so callers can distinguish
a truly unknown year from a misconfigured DB.
"""
try:
import sqlite3
with sqlite3.connect(self.jcr_engine.dbfile) as conn:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row[0] for row in cursor.fetchall()]
if "metadata" in tables:
cursor.execute(
"SELECT value FROM metadata WHERE key='year' OR key='jcr_year'"
)
year_result = cursor.fetchone()
if year_result:
return f"JCR {year_result[0]}"
import re
db_path = str(self.jcr_engine.dbfile)
year_match = re.search(r"20\d{2}", db_path)
if year_match:
return f"JCR {year_match.group()}"
except Exception as exc:
logger.debug(
f"ImpactFactorEngine: JCR year lookup failed "
f"({type(exc).__name__}: {exc}); returning 'Source Unknown'"
)
return "Source Unknown"
[docs]
def _get_metrics_uncached(self, journal_name: str) -> Optional[Dict]:
"""Get journal metrics without caching."""
if not self.jcr_engine or not journal_name:
return None
try:
results = self.jcr_engine.search(journal_name)
if results:
result = results[0]
return {
"impact_factor": float(result.get("factor", 0)),
"quartile": result.get("jcr", "Unknown"),
"source": self._get_jcr_year(),
}
except Exception:
pass
return None
[docs]
def get_database_info(self) -> Dict:
"""Get information about the impact factor database."""
if not self.jcr_engine:
return {"error": "Database not available"}
import sqlite3
db_path = self.jcr_engine.dbfile
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row[0] for row in cursor.fetchall()]
info = {
"database_path": str(db_path),
"tables": tables,
"total_journals": 0,
"data_year": self._get_jcr_year(),
}
if tables:
main_table = tables[0]
cursor.execute(f"SELECT COUNT(*) FROM {main_table}")
info["total_journals"] = cursor.fetchone()[0]
cursor.execute(f"PRAGMA table_info({main_table})")
columns = [row[1] for row in cursor.fetchall()]
info["columns"] = columns
cursor.execute(f"SELECT * FROM {main_table} LIMIT 3")
sample_data = cursor.fetchall()
info["sample_data"] = sample_data
return info
[docs]
def get_journal_metrics(journal_name: str) -> Optional[Dict]:
"""Standalone function to get journal metrics.
Parameters
----------
journal_name : str
Name of the journal
Returns
-------
Optional[Dict]
Dictionary with impact_factor, quartile, and source keys
Example
-------
>>> metrics = get_journal_metrics("Nature")
>>> print(metrics["impact_factor"])
64.8
"""
engine = ImpactFactorEngine()
return engine.get_metrics(journal_name)
if __name__ == "__main__":
def main():
"""Demonstrate journal metrics lookup."""
metrics_instance = ImpactFactorEngine()
# Show database info
print("Database Information")
print("=" * 50)
db_info = metrics_instance.get_database_info()
for key, value in db_info.items():
print(f"{key}: {value}")
print("\nJournal Metrics Lookup Demo")
print("=" * 50)
test_journals = ["Nature", "Science", "Cell"]
for journal in test_journals:
print(f"\nJournal: {journal}")
metrics = get_journal_metrics(journal)
if metrics:
for key, value in metrics.items():
print(f" {key}: {value}")
else:
print(" No metrics found")
main()
# python -m scitex_scholar.extra.JournalMetrics
# EOF