# -*- coding: utf-8 -*-
# BioSTEAM: The Biorefinery Simulation and Techno-Economic Analysis Modules
# Copyright (C) 2020, Yoel Cortes-Pena <yoelcortes@gmail.com>
#
# A significant portion of this module originates from:
# Chemical Engineering Design Library (ChEDL). Utilities for process modeling.
# Copyright (C) 2020 Caleb Bell <Caleb.Andrew.Bell@gmail.com>
#
# This module is under a dual license:
# 1. The UIUC open-source license. See
# github.com/BioSTEAMDevelopmentGroup/biosteam/blob/master/LICENSE.txt
# for license details.
#
# 2. The MIT open-source license. See
# https://github.com/CalebBell/thermo/blob/master/LICENSE.txt for details.
"""
This module includes elemental data taken from [1]_ and [2]_, and
functions to calculate molecular properties from elemental data.
References
----------
.. [1] N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and
G R Hutchison. "Open Babel: An open chemical toolbox." J. Cheminf.
(2011), 3, 33. DOI:10.1186/1758-2946-3-33
.. [2] RDKit: Open-source cheminformatics; http://www.rdkit.org
.. [3] Laštovka, Václav, Nasser Sallamie, and John M. Shaw. "A Similarity
Variable for Estimating the Heat Capacity of Solid Organic Compounds:
Part I. Fundamentals." Fluid Phase Equilibria 268, no. 1-2
(June 25, 2008): 51-60. doi:10.1016/j.fluid.2008.03.019.
.. [4] Hill, Edwin A."“ON A SYSTEM OF INDEXING CHEMICAL LITERATURE;
ADOPTED BY THE CLASSIFICATION DIVISION OF THE U. S. PATENT OFFICE.1."
Journal of the American Chemical Society 22, no. 8 (August 1, 1900):
478-94. doi:10.1021/ja02046a005.
"""
__all__ = (
'PeriodicTable', 'compute_molecular_weight', 'compute_mass_fractions',
'compute_atom_fractions', 'compute_similarity_variable', 'atoms_to_Hill',
'parse_simple_formula', 'parse_nested_formula', 'homonuclear_elemental_gases',
'charge_from_formula', 'serialize_formula', 'atoms_to_array',
)
import numpy as np
import os
import re
import string
from .readers import load_json
fpath = os.path
join = fpath.join
parent_path = join(fpath.dirname(__file__), 'Data')
folder = join(parent_path, 'Misc')
# %% Element data
# Big problem: Atoms like N2, O2 point to only the singlet
homonuclear_elemental_gases = (1, 7, 8, 9, 17) # 35, 53
homonuclear_elemental_singlets_CASs = ("12385-13-6", "17778-88-0",
"17778-80-2", "14762-94-8",
"22537-15-1")
homonuclear_elemental_gases = frozenset(homonuclear_elemental_gases)
homonuclear_elemental_singlets_CASs = frozenset(homonuclear_elemental_singlets_CASs)
# %% Core
[docs]class PeriodicTable:
"""
Periodic Table object for use in dealing with elements.
Parameters
----------
elements : Iterable[Element]
List of Element objects
Notes
-----
Has a length of 118 elements.
See Also
--------
periodic_table
Element
"""
__slots__ = ('elements', 'numerical_index', 'symbol_index',
'name_index', 'CAS_index', 'indexes')
def __init__(self, elements):
self.elements = elements = tuple(elements)
self.numerical_index = numerical_index = {}
self.symbol_index = symbol_index = {}
self.name_index = name_index = {}
self.CAS_index = CAS_index = {}
self.indexes = (symbol_index, numerical_index,
name_index, CAS_index)
for e in elements:
numerical_index[str(e.number)] = e
symbol_index[e.symbol] = e
name_index[e.name] = e
name_index[e.name.lower()] = e
CAS_index[e.CAS] = e
def __contains__(self, key):
for i in self.indexes:
if key in i: return True
return False
def __len__(self):
return 118
def __iter__(self):
return iter(self.elements)
def __getitem__(self, key):
for i in self.indexes:
if key in i: return i[key]
class Element:
"""
Create an Element object that stores data on chemical elements. Supports most common
properties. If a property is not available, it is set to None.
Attributes
----------
number : int
Atomic number
name : str
name
symbol : str
Elemental symbol
MW : float
Molecular weight
CAS : str
CAS number
period : str
Period in the periodic table
group : str
Group in the periodic table
block : str
Block in the periodic table
AReneg : float
Allred and Rochow electronegativity
rcov : float
Covalent radius, [Angstrom]
rvdw : float
Van der Waals radius, [Angstrom]
maxbonds : float
Maximum valence of a bond with this element
elneg : float
Pauling electronegativity
ionization : float
Ionization potential, [eV]
ionization : float
elaffinity affinity, [eV]
protons : int
Number of protons
electrons : int
Number of electrons of the element in the ground state
InChI : str
Standard InChI string of the element
InChI_key : str
25-character hash of the compound's InChI.
smiles : str
Standard smiles string of the element
PubChem : int
PubChem Compound identifier (CID) of the chemical
"""
__slots__ = ('number', 'symbol', 'name', 'CAS', 'MW', 'AReneg', 'rcov',
'rvdw', 'maxbonds', 'elneg', 'ionization', 'elaffinity',
'period', 'group', 'block', 'InChI_key', 'PubChem')
def __init__(self, name, number, symbol, MW, CAS, AReneg, rcov, rvdw,
maxbonds, elneg, ionization, elaffinity, period, group, block,
PubChem, InChI_key):
self.name = name
self.number = number
self.symbol = symbol
self.MW = MW
self.CAS = CAS
self.period = period
self.group = group
self.block = block
self.AReneg = AReneg
self.rcov = rcov
self.rvdw = rvdw
self.maxbonds = maxbonds
self.elneg = elneg
self.ionization = ionization
self.elaffinity = elaffinity
self.InChI_key = InChI_key
self.PubChem = PubChem
@property
def protons(self): return self.number
@property
def electrons(self): return self.number
@property
def InChI(self): return self.symbol # 'InChI=1S/' +
@property
def smiles(self): return '[' + self.symbol + ']'
def __repr__(self):
return f"<{type(self).__name__}: {self.name}>"
#: Single instance of the PeriodicTable class
periodic_table = PeriodicTable(
[Element(name, **data) for name, data in load_json(folder, 'elements.json').items()])
[docs]def compute_molecular_weight(atoms):
r"""
Return molecular weight of a molecule given a dictionary of its
atoms and their counts, in the format {symbol: count}.
.. math::
MW = \sum_i n_i MW_i
Parameters
----------
atoms : dict
dictionary of counts of individual atoms, indexed by symbol with
proper capitalization, [-]
Returns
-------
MW : float
Calculated molecular weight [g/mol]
Notes
-----
Elemental data is from rdkit, with CAS numbers added. An exception is
raised if an incorrect element symbol is given. Elements up to 118 are
supported, as are deutreium and tritium.
Examples
--------
>>> compute_molecular_weight({'H': 12, 'C': 20, 'O': 5}) # DNA
332.30628
"""
MW = 0
for i in atoms:
if i in periodic_table:
MW += periodic_table[i].MW*atoms[i]
elif i == 'D':
# Hardcoded MW until an actual isotope db is created
MW += 2.014102*atoms[i]
elif i == 'T':
# Hardcoded MW until an actual isotope db is created
MW += 3.0160492*atoms[i]
else:
raise ValueError(f'molecule includes unknown atom {repr(i)}')
return MW
[docs]def compute_mass_fractions(atoms, MW=None):
r"""
Return the mass fractions of each element in a compound,
given a dictionary of its atoms and their counts, in the format
{symbol: count}.
.. math::
w_i = \frac{n_i MW_i}{\sum_i n_i MW_i}
Parameters
----------
atoms : dict
dictionary of counts of individual atoms, indexed by symbol with
proper capitalization, [-]
MW : float, optional
Molecular weight, [g/mol]
Returns
-------
mfracs : dict
dictionary of mass fractions of individual atoms, indexed by symbol
with proper capitalization, [-]
Notes
-----
Molecular weight is optional, but speeds up the calculation slightly. It
is calculated using the function `compute_molecular_weight` if not specified.
Elemental data is from rdkit, with CAS numbers added. An exception is
raised if an incorrect element symbol is given. Elements up to 118 are
supported.
Examples
--------
>>> compute_mass_fractions({'H': 12, 'C': 20, 'O': 5})
{'H': 0.03639798802478244, 'C': 0.7228692758981262, 'O': 0.24073273607709128}
"""
if not MW:
MW = compute_molecular_weight(atoms)
mfracs = {}
for i in atoms:
if i in periodic_table:
mfracs[i] = periodic_table[i].MW*atoms[i]/MW
else:
raise ValueError('invalid atom {i}')
return mfracs
[docs]def compute_atom_fractions(atoms):
r"""
Return the atomic fractions of each element in a compound,
given a dictionary of its atoms and their counts, in the format
{symbol: count}.
.. math::
a_i = \frac{n_i}{\sum_i n_i}
Parameters
----------
atoms : dict
dictionary of counts of individual atoms, indexed by symbol with
proper capitalization, [-]
Returns
-------
afracs : dict
dictionary of atomic fractions of individual atoms, indexed by symbol
with proper capitalization, [-]
Notes
-----
No actual data on the elements is used, so incorrect or custom compounds
would not raise an error.
Examples
--------
>>> compute_atom_fractions({'H': 12, 'C': 20, 'O': 5})
{'H': 0.32432432432432434, 'C': 0.5405405405405406, 'O': 0.13513513513513514}
"""
count = sum(atoms.values())
afracs = {}
for i in atoms:
afracs[i] = atoms[i]/count
return afracs
[docs]def compute_similarity_variable(atoms, MW=None):
r"""
Return the similarity variable of an compound, as defined in [3]_.
Currently only applied for certain heat capacity estimation routines.
.. math::
\alpha = \frac{N}{MW} = \frac{\sum_i n_i}{\sum_i n_i MW_i}
Parameters
----------
atoms : dict
dictionary of counts of individual atoms, indexed by symbol with
proper capitalization, [-]
MW : float, optional
Molecular weight, [g/mol]
Returns
-------
compute_similarity_variable : float
Similarity variable as defined in [1]_, [mol/g]
Notes
-----
Molecular weight is optional, but speeds up the calculation slightly. It
is calculated using the function `compute_molecular_weight` if not specified.
Examples
--------
>>> compute_similarity_variable({'H': 32, 'C': 15})
0.2212654140784498
"""
if not MW:
MW = compute_molecular_weight(atoms)
return sum(atoms.values())/MW
[docs]def atoms_to_Hill(atoms):
r"""
Determine the Hill formula of a compound as in [4]_, given a dictionary of its
atoms and their counts, in the format {symbol: count}.
Parameters
----------
atoms : dict
dictionary of counts of individual atoms, indexed by symbol with
proper capitalization, [-]
Returns
-------
Hill_formula : str
Hill formula, [-]
Notes
-----
The Hill system is as follows:
If the chemical has 'C' in it, this is listed first, and then if it has
'H' in it as well as 'C', then that goes next. All elements are sorted
alphabetically afterwards, including 'H' if 'C' is not present.
All elements are followed by their count, unless it is 1.
Examples
--------
>>> atoms_to_Hill({'H': 5, 'C': 2, 'Br': 1})
'C2H5Br'
"""
def str_ele_count(ele):
if atoms[ele] == 1:
count = ''
else:
count = str(atoms[ele])
return count
atoms = atoms.copy()
s = ''
if 'C' in atoms.keys():
s += 'C' + str_ele_count('C')
del atoms['C']
if 'H' in atoms.keys():
s += 'H' + str_ele_count('H')
del atoms['H']
for ele in sorted(atoms.keys()):
s += ele + str_ele_count(ele)
else:
for ele in sorted(atoms.keys()):
s += ele + str_ele_count(ele)
return s
_formula_parser = re.compile(r'([A-Z][a-z]{0,2})([\d\.\d]+)?')
formula_token_matcher_rational = re.compile('[A-Z][a-z]?|(?:\d*[.])?\d+|\d+|[()]')
letter_set = set(string.ascii_letters)
bracketed_charge_re = re.compile('\([+-]?\d+\)$|\(\d+[+-]?\)$|\([+-]+\)$')
def atoms_to_array(atoms: dict) -> np.ndarray:
symbol_index = periodic_table.symbol_index
array = np.zeros(118)
for symbol, value in atoms.items():
index = symbol_index[symbol].number - 1
array[index] = value
return array
def array_to_atoms(array: np.ndarray) -> dict:
index, = np.where(array != 0.)
values = array[index]
elements = periodic_table.elements
symbols = [elements[i].symbol for i in index]
return dict(zip(symbols, values))