"""TcalORCA"""
import functools
import os
import re
from pathlib import Path
from typing import List, Optional, Tuple
import numpy as np
from tcal.tcal import Tcal
print = functools.partial(print, flush=True)
[docs]
class TcalORCA(Tcal):
"""Calculate transfer integrals using ORCA via OPI (orca-pi)."""
def __init__(
self,
file: str,
monomer1_atom_num: int = -1,
method: str = 'B3LYP/6-31G(d,p)',
charge: int = 0,
spin: int = 0,
ncore: int = 4,
max_memory_mb: int = 16 * 1024, # GB -> MB
open_mpi_path: Optional[str] = None,
) -> None:
"""Inits TcalORCA.
Parameters
----------
file : str
A path of xyz file.
monomer1_atom_num : int
Number of atoms in the first monomer. default -1
If -1, it is half the number of atoms in the dimer.
method : str
Calculation method and basis set in "METHOD/BASIS" format. default 'B3LYP/6-31G(d,p)'
charge : int
Molecular charge. default 0
spin : int
Number of unpaired electrons (2S = n_alpha - n_beta). default 0 (closed-shell singlet)
ncore : int
Number of parallel processes for ORCA (%pal nprocs). default 4
max_memory_mb : int
Maximum memory in MB for ORCA (%maxcore). default 16384 (16 GB)
open_mpi_path : str, optional
Path to the OpenMPI installation directory (e.g. '/usr/lib/x86_64-linux-gnu/openmpi').
Required when OpenMPI is not in $PATH and the OPI_MPI environment variable is not set.
Sets OPI_MPI temporarily during ORCA execution.
"""
if spin != 0:
raise NotImplementedError('TcalORCA only supports closed-shell (spin=0) molecules.')
super().__init__(file, monomer1_atom_num)
self._method = method
self._charge = charge
self._spin = spin
self._ncore = ncore
self._max_memory_mb = max_memory_mb
self._open_mpi_path = open_mpi_path
self.extension_log = '.out'
self._output1 = None # cache for monomer 1 OPI Output object
self._output2 = None # cache for monomer 2 OPI Output object
self._output_d = None # cache for dimer OPI Output object
[docs]
def run_orca(self, skip_monomer_num: List[int] = [0]) -> None:
"""Run ORCA calculations for monomers and dimer.
Parameters
----------
skip_monomer_num : list[int]
If 1 is in the list, skip 1st monomer calculation.
If 2 is in the list, skip 2nd monomer calculation.
If 3 is in the list, skip dimer calculation.
"""
atoms_dimer, atoms_m1, atoms_m2 = self._parse_xyz()
functional, basis = self._method.split('/', 1)
working_dir = Path(self._base_path).parent.resolve()
stem = Path(self._base_path).name
if 1 in skip_monomer_num:
print('skip 1st monomer calculation')
else:
self._output1 = self._run_orca_calculation(
atoms=atoms_m1,
functional=functional,
basis=basis,
basename=f'{stem}_m1',
working_dir=working_dir,
label='1st monomer',
)
if 2 in skip_monomer_num:
print('skip 2nd monomer calculation')
else:
self._output2 = self._run_orca_calculation(
atoms=atoms_m2,
functional=functional,
basis=basis,
basename=f'{stem}_m2',
working_dir=working_dir,
label='2nd monomer',
)
if 3 in skip_monomer_num:
print('skip dimer calculation')
else:
self._output_d = self._run_orca_calculation(
atoms=atoms_dimer,
functional=functional,
basis=basis,
basename=stem,
working_dir=working_dir,
label='dimer',
)
[docs]
def read_monomer1(
self,
is_matrix: bool = False,
output_matrix: bool = False,
) -> None:
"""Extract MO coefficients from ORCA result of monomer 1.
Parameters
----------
is_matrix : bool
If True, print MO coefficients. default False
output_matrix : bool
If True, output MO coefficients to CSV. default False
"""
print(f'reading {self._base_path}_m1.out')
working_dir = Path(self._base_path).parent.resolve()
stem = Path(self._base_path).name
output = self._output1 or self._load_output(f'{stem}_m1', working_dir)
mos = output.get_mos()['mo']
# shape: (n_bsuse1, n_basis1) — rows = MOs, columns = AO basis functions
mo_coeff = np.array([mo.mocoefficients for mo in mos])
self.n_bsuse1 = mo_coeff.shape[0]
self.n_basis1 = mo_coeff.shape[1]
nel, _ = output.get_nelectrons()
self.n_elect1 = nel // 2
atoms_list = output._safe_get('results_gbw', 0, 'molecule', 'atoms') or []
self.n_atoms1 = len(atoms_list)
self.mo1 = mo_coeff
if is_matrix:
print(' *** Alpha MO coefficients *** ')
self.print_matrix(self.mo1)
if output_matrix:
self.output_csv(f'{self._base_path}_mo1.csv', self.mo1)
[docs]
def read_monomer2(
self,
is_matrix: bool = False,
output_matrix: bool = False,
) -> None:
"""Extract MO coefficients from ORCA result of monomer 2.
Parameters
----------
is_matrix : bool
If True, print MO coefficients. default False
output_matrix : bool
If True, output MO coefficients to CSV. default False
"""
print(f'reading {self._base_path}_m2.out')
working_dir = Path(self._base_path).parent.resolve()
stem = Path(self._base_path).name
output = self._output2 or self._load_output(f'{stem}_m2', working_dir)
mos = output.get_mos()['mo']
# shape: (n_bsuse2, n_basis2) — rows = MOs, columns = AO basis functions
mo_coeff = np.array([mo.mocoefficients for mo in mos])
self.n_bsuse2 = mo_coeff.shape[0]
self.n_basis2 = mo_coeff.shape[1]
nel, _ = output.get_nelectrons()
self.n_elect2 = nel // 2
atoms_list = output._safe_get('results_gbw', 0, 'molecule', 'atoms') or []
self.n_atoms2 = len(atoms_list)
self.mo2 = mo_coeff
if is_matrix:
print(' *** Alpha MO coefficients *** ')
self.print_matrix(self.mo2)
if output_matrix:
self.output_csv(f'{self._base_path}_mo2.csv', self.mo2)
[docs]
def read_dimer(
self,
is_matrix: bool = False,
output_matrix: bool = False,
) -> None:
"""Extract overlap and Fock matrix from ORCA result of dimer.
Parameters
----------
is_matrix : bool
If True, print overlap and Fock matrices. default False
output_matrix : bool
If True, output overlap and Fock matrices to CSV. default False
"""
print(f'reading {self._base_path}.out')
working_dir = Path(self._base_path).parent.resolve()
stem = Path(self._base_path).name
output = self._output_d or self._load_output(stem, working_dir)
self.n_basis_d = output.get_nbf()
nel, _ = output.get_nelectrons()
self.n_elect_d = nel // 2
# Step 1: Restore default GBW JSON (MO data, no matrices).
# orca_2json with a config dict produces a matrix-only JSON that omits
# MolecularOrbitals/OrbitalLabels. Passing an empty config recreates the
# default JSON which always includes MO data.
output.recreate_gbw_results({})
# Step 2: Extract AO labels and MO data while the default JSON is active.
self._build_ao_labels_from_orca(output)
mos = output.get_mos()['mo']
C = np.array([mo.mocoefficients for mo in mos]).T # (n_basis_d, n_mos)
mo_energies = np.array([mo.orbitalenergy for mo in mos]) # Hartree
# Step 3: Recreate the GBW JSON with all needed matrices in ONE call.
# This overwrites the MO data in the in-memory JSON, which is fine because
# we already stored C and mo_energies above.
_matrix_config = {"1elIntegrals": ["S", "H"], "FockMatrix": ["F"]}
output.recreate_gbw_results(_matrix_config)
# Step 4: Read matrices from the now-active matrix-containing JSON.
self.overlap = output.get_int_overlap()
h_core = output.get_int_hcore()
g_mat = output.get_int_f()
# Step 5: Build full Fock/KS matrix: F = H_core + G
# get_int_f() returns only the two-electron part G (J - αK + V_XC).
# Fallback: reconstruct F_AO = S C ε C^T S from canonical MOs.
if h_core is not None and g_mat is not None:
self.fock = h_core + g_mat
else:
self.fock = self.overlap @ C @ np.diag(mo_energies) @ C.T @ self.overlap
# Extend monomer MOs to dimer basis by zero-padding.
# mo1/mo2 are already (n_bsuse, n_basis) so no transpose is needed,
# unlike TcalPySCF which stores (n_basis, n_bsuse) and transposes here.
zeros1 = np.zeros((self.n_bsuse1, self.n_basis2))
self.mo1 = np.hstack([self.mo1, zeros1]) # (n_bsuse1, n_basis_d)
zeros2 = np.zeros((self.n_bsuse2, self.n_basis1))
self.mo2 = np.hstack([zeros2, self.mo2]) # (n_bsuse2, n_basis_d)
if is_matrix:
print()
print(' *** Overlap *** ')
self.print_matrix(self.overlap)
print()
print(' *** Fock matrix (alpha) *** ')
self.print_matrix(self.fock)
if output_matrix:
self.output_csv(f'{self._base_path}_overlap.csv', self.overlap)
self.output_csv(f'{self._base_path}_fock.csv', self.fock)
[docs]
def create_cube_file(self, resolution: int = 40) -> None:
"""Create cube files for NHOMO, HOMO, LUMO, NLUMO of monomers 1 and 2.
Parameters
----------
resolution : int
Grid resolution for orca_plot. Higher values produce smoother plots
but take longer. default 40
"""
working_dir = Path(self._base_path).parent.resolve()
stem = Path(self._base_path).name
for monomer_num, (output_cache, monomer_stem) in enumerate([
(self._output1, f'{stem}_m1'),
(self._output2, f'{stem}_m2'),
], start=1):
output = output_cache or self._load_output(monomer_stem, working_dir)
nel, _ = output.get_nelectrons()
n_occ = nel // 2
orbitals = [
('NHOMO', n_occ - 2),
('HOMO', n_occ - 1),
('LUMO', n_occ),
('NLUMO', n_occ + 1),
]
ordinal = '1st' if monomer_num == 1 else '2nd'
print(f'generating cube files for {ordinal} monomer')
for label, idx in orbitals:
cube_out = output.plot_mo(idx, resolution=resolution)
if cube_out is None:
print(f'WARNING: failed to generate cube file for {label}')
continue
target = working_dir / f'{monomer_stem}_{label}.cube'
cube_out.path.rename(target)
print(f' {target}')
print(f'cube file of the {ordinal} monomer created')
def _parse_xyz(self) -> Tuple[List, List, List]:
"""Parse XYZ file and split into dimer, monomer 1, and monomer 2 atoms.
Returns
-------
tuple
(atoms_dimer, atoms_m1, atoms_m2) where each is a list of (symbol, (x, y, z))
"""
atoms_dimer = []
with open(f'{self._base_path}.xyz', 'r', encoding='utf-8') as f:
f.readline() # skip atom count line
f.readline() # skip comment line
while True:
line = f.readline()
if not line:
break
parts = line.strip().split()
if len(parts) == 4:
symbol = parts[0]
x, y, z = map(float, parts[1:4])
atoms_dimer.append((symbol, (x, y, z)))
if self.monomer1_atom_num == -1:
n_m1 = len(atoms_dimer) // 2
else:
n_m1 = self.monomer1_atom_num
return atoms_dimer, atoms_dimer[:n_m1], atoms_dimer[n_m1:]
def _run_orca_calculation(
self,
atoms: List,
functional: str,
basis: str,
basename: str,
working_dir: Path,
label: str,
):
"""Set up and run an ORCA single-point calculation via OPI.
Parameters
----------
atoms : list
List of (symbol, (x, y, z)) tuples.
functional : str
DFT functional name (e.g. 'B3LYP') or 'HF'.
basis : str
Basis set name (e.g. '6-31G(d,p)').
basename : str
Basename for ORCA input/output files.
working_dir : Path
Directory where ORCA files will be written.
label : str
Human-readable label for progress messages.
Returns
-------
Output
OPI Output object with parsed results.
"""
from opi.core import Calculator
from opi.input.structures.structure import Structure
# Write a temporary XYZ file for the structure
xyz_path = working_dir / f'{basename}_input.xyz'
with open(xyz_path, 'w', encoding='utf-8') as f:
f.write(f'{len(atoms)}\n\n')
for symbol, (x, y, z) in atoms:
f.write(f'{symbol} {x} {y} {z}\n')
structure = Structure.from_xyz(
xyz_path,
charge=self._charge,
multiplicity=self._spin + 1,
)
calc = Calculator(basename=basename, working_dir=working_dir, version_check=False)
calc.structure = structure
calc.input.add_arbitrary_string(f'! {functional} {basis} SP\n')
calc.input.add_arbitrary_string(f'%pal nprocs {self._ncore} end\n')
calc.input.add_arbitrary_string(f'%maxcore {self._max_memory_mb}\n')
print(f'running {label} calculation')
if self._open_mpi_path is not None:
_prev_opi_mpi = os.environ.get('OPI_MPI')
os.environ['OPI_MPI'] = self._open_mpi_path
try:
calc.write_and_run()
finally:
if self._open_mpi_path is not None:
if _prev_opi_mpi is None:
del os.environ['OPI_MPI']
else:
os.environ['OPI_MPI'] = _prev_opi_mpi
output = calc.get_output()
output.parse()
if not output.terminated_normally():
print(f'WARNING: {label} calculation did not terminate normally.')
if not output.scf_converged():
print(f'WARNING: {label} SCF did not converge.')
else:
print(f'{label} calculation completed')
print(f' {working_dir / (basename + ".out")}')
return output
def _load_output(self, basename: str, working_dir: Path):
"""Load and parse an existing ORCA output from disk.
Parameters
----------
basename : str
Basename of the ORCA job.
working_dir : Path
Directory containing the ORCA output files.
Returns
-------
Output
Parsed OPI Output object.
Raises
------
FileNotFoundError
If the output file does not exist.
"""
from opi.output.core import Output
out_path = working_dir / f'{basename}.out'
if not out_path.exists():
raise FileNotFoundError(
f'{out_path} not found. Run run_orca() first or check the file path.'
)
output = Output(basename=basename, working_dir=working_dir, version_check=False)
output.parse()
return output
def _build_ao_labels_from_orca(self, output) -> None:
"""Populate atom_index, atom_symbol, and atom_orbital from ORCA orbital labels.
Parameters
----------
output : Output
Parsed OPI Output object for the dimer.
"""
labels = output._safe_get(
'results_gbw', 0, 'molecule', 'molecularorbitals', 'orbitallabels'
)
if labels is None:
return
# Each label string is formatted as "{atom_0idx}{element} {orbital_type}"
# e.g. "0C 1s", "0C 2s", "0C 1pz", "12H 1s"
# atom_0idx is 0-indexed; element follows immediately with no space.
pattern = re.compile(r'^(\d+)([A-Za-z]+)\s+(.+)$')
self.atom_index = []
self.atom_symbol = []
self.atom_orbital = []
for label in labels:
m = pattern.match(label)
if m:
atom_0idx = int(m.group(1))
elem = m.group(2)
orb = m.group(3).strip()
self.atom_index.append(atom_0idx) # already 0-indexed
self.atom_symbol.append(elem)
self.atom_orbital.append(orb)