#cobra.core.Formula.py
#######################
#BEGIN Class Formula
#
import re
from warnings import warn
from copy import deepcopy
from .Object import Object
[docs]class Formula(Object):
"""Formula is a class for holding information regarding
a Metabolite formula.
This will replace the current way of dealing with molecular
formulae in cobra.Metabolite objects
Legal formula string characters include letters, numbers, and *.
If a formula string starts with a number then it is assumed that
all element counts are multiplied by this number.
"""
def __init__(self, formula=None):
Object.__init__(self, formula)
self.formula = formula
self.weight = None
self.elements = {}
if self.formula is not None:
self.parse_composition()
def __add__(self, other_formula):
"""Combine two molecular formulas.
other_formula: cobra.Formula or String representation of a chemical Formula.
"""
elements = deepcopy(self.elements)
if isinstance(other_formula, str):
other_formula = Formula(other_formula)
for element, element_count in other_formula.elements.items():
if element in elements:
elements[element] += element_count
else:
elements[element] = element_count
new_formula = Formula()
new_formula.id = new_formula.formula = reduce(lambda x, y: x + y,
['%s%i'%(k,v)
for k, v in elements.items()])
new_formula.elements = elements
new_formula.calculate_formula_weight()
return new_formula
[docs] def parse_composition(self):
"""Breaks the chemical formula down by element.
Useful for making sure Reactions are balanced.'
TODO: Find a stable python package for parsing chemical formulas.
"""
try:
starting_coefficient_re = re.compile('^\d+')
if self.elements == '' or self.formula == None:
self.elements = {}
elif '*' in self.formula:
tmp_formula = reduce(lambda x, y: x + y,
map(Formula, self.formula.split('*')))
self.elements = tmp_formula.elements
self.calculate_formula_weight()
elif starting_coefficient_re.match(self.formula):
#Check to see if the formula starts with an integer. If
#it does then multiply all el
#This is often the case for some salt hydrates (e.g. Na2WO4*2H2O)
formula = starting_coefficient_re.split(self.formula)[-1]
coefficient = int(self.formula.split(formula)[0])
tmp_formula = Formula(formula)
[tmp_formula.elements.update({k: coefficient*v})
for k, v in tmp_formula.elements.items()]
self.elements = tmp_formula.elements
self.calculate_formula_weight()
elif len(re.split('[^A-Za-z0-9]', self.formula)) > 1:
self.elements = {}
warn('The formula %s is not in empirical format so it will not be parsed'%self.formula)
else:
element_number_re = re.compile('\d+')
#This will probably be better accomplished by breaking
#on anything that preceeds a capital letter and
elements = element_number_re.split(self.formula)
if elements[-1] == '':
#remove the empty element generated by the last item in
#the_metabolite string being a number
elements.pop()
element_counts = map(float, element_number_re.findall(self.formula))
if element_counts > elements:
print 'Have more element counts than elements in formula: ' +\
self.formula
#return
while len(element_counts) != len(elements):
element_counts.append(1.0)
#Split apart any elements that weren't separated by
#a number and assign their count as 1
element_re=re.compile('[A-Z][a-z]*')
tmp_element_list = []
tmp_count_list = []
for i_element in range(len(elements)):
#Find all atoms in self.formula with the format Xx*
split_element = element_re.findall(elements[i_element])
if len(split_element) == 1:
tmp_element_list.append(split_element)
tmp_count_list.append([element_counts[i_element]])
else:
tmp_elements = [split_element.pop()]
tmp_counts = [element_counts[i_element]]
#The last element in the split_element is what has
#the following count so no need to update element_counts yet
split_element.reverse()
for the_element in split_element:
tmp_elements.append(the_element)
tmp_counts.append(1)
tmp_elements.reverse()
tmp_counts.reverse()
tmp_element_list.append(tmp_elements)
tmp_count_list.append(tmp_counts)
elements = []
element_counts = []
for the_element in tmp_element_list:
elements += the_element
for the_count in tmp_count_list:
element_counts += the_count
#Zip the elements / counts into dictionary format
self.elements = dict(zip(elements, element_counts))
self.calculate_formula_weight()
except:
warn('Unable to parse your formula: %s. Perhaps it is not'%self.formula +\
' empirical? Perhaps the composition has floats?')
[docs] def calculate_formula_weight(self, weight_dict=None):
"""Calculate the formula weight.
weight_dict: None or a dictionary of elements and weights.
"""
if weight_dict is None:
weight_dict = elements_and_molecular_weights
try:
self.weight = 0.
self.weight = sum([the_count*weight_dict[the_element]
for the_element, the_count in self.elements.items()])
except KeyError, e:
self.weight = None
warn('The element %s is not in the weight_dict'%e)
#
#END Class Formula
########################
elements_and_molecular_weights = {
'H' : 1.007940,
'He' : 4.002602,
'Li' : 6.941000,
'Be' : 9.012182,
'B' : 10.811000,
'C' : 12.010700,
'N' : 14.006700,
'O' : 15.999400,
'F' : 18.998403,
'Ne' : 20.179700,
'Na' : 22.989770,
'Mg' : 24.305000,
'Al' : 26.981538,
'Si' : 28.085500,
'P' : 30.973761,
'S' : 32.065000,
'Cl' : 35.453000,
'Ar' : 39.948000,
'K' : 39.098300,
'Ca' : 40.078000,
'Sc' : 44.955910,
'Ti' : 47.867000,
'V' : 50.941500,
'Cr' : 51.996100,
'Mn' : 54.938049,
'Fe' : 55.845000,
'Co' : 58.933200,
'Ni' : 58.693400,
'Cu' : 63.546000,
'Zn' : 65.409000,
'Ga' : 69.723000,
'Ge' : 72.640000,
'As' : 74.921600,
'Se' : 78.960000,
'Br' : 79.904000,
'Kr' : 83.798000,
'Rb' : 85.467800,
'Sr' : 87.620000,
'Y' : 88.905850,
'Zr' : 91.224000,
'Nb' : 92.906380,
'Mo' : 95.940000,
'Tc' : 98.000000,
'Ru' : 101.070000,
'Rh' : 102.905500,
'Pd' : 106.420000,
'Ag' : 107.868200,
'Cd' : 112.411000,
'In' : 114.818000,
'Sn' : 118.710000,
'Sb' : 121.760000,
'Te' : 127.600000,
'I' : 126.904470,
'Xe' : 131.293000,
'Cs' : 132.905450,
'Ba' : 137.327000,
'La' : 138.905500,
'Ce' : 140.116000,
'Pr' : 140.907650,
'Nd' : 144.240000,
'Pm' : 145.000000,
'Sm' : 150.360000,
'Eu' : 151.964000,
'Gd' : 157.250000,
'Tb' : 158.925340,
'Dy' : 162.500000,
'Ho' : 164.930320,
'Er' : 167.259000,
'Tm' : 168.934210,
'Yb' : 173.040000,
'Lu' : 174.967000,
'Hf' : 178.490000,
'Ta' : 180.947900,
'W' : 183.840000,
'Re' : 186.207000,
'Os' : 190.230000,
'Ir' : 192.217000,
'Pt' : 195.078000,
'Au' : 196.966550,
'Hg' : 200.590000,
'Tl' : 204.383300,
'Pb' : 207.200000,
'Bi' : 208.980380,
'Po' : 209.000000,
'At' : 210.000000,
'Rn' : 222.000000,
'Fr' : 223.000000,
'Ra' : 226.000000,
'Ac' : 227.000000,
'Th' : 232.038100,
'Pa' : 231.035880,
'U' : 238.028910,
'Np' : 237.000000,
'Pu' : 244.000000,
'Am' : 243.000000,
'Cm' : 247.000000,
'Bk' : 247.000000,
'Cf' : 251.000000,
'Es' : 252.000000,
'Fm' : 257.000000,
'Md' : 258.000000,
'No' : 259.000000,
'Lr' : 262.000000,
'Rf' : 261.000000,
'Db' : 262.000000,
'Sg' : 266.000000,
'Bh' : 264.000000,
'Hs' : 277.000000,
'Mt' : 268.000000,
'Ds' : 281.000000,
'Rg' : 272.000000,
'Cn' : 285.000000,
'Uuq': 289.000000,
'Uuh': 292.000000
}