Source code for cobra.core.Formula

#cobra.core.Formula.py
#######################
#BEGIN Class Formula
#
import re
from warnings import warn
from copy import deepcopy
from .Object import Object
[docs]class Formula(Object): """Formula is a class for holding information regarding a Metabolite formula. This will replace the current way of dealing with molecular formulae in cobra.Metabolite objects Legal formula string characters include letters, numbers, and *. If a formula string starts with a number then it is assumed that all element counts are multiplied by this number. """ def __init__(self, formula=None): Object.__init__(self, formula) self.formula = formula self.weight = None self.elements = {} if self.formula is not None: self.parse_composition() def __add__(self, other_formula): """Combine two molecular formulas. other_formula: cobra.Formula or String representation of a chemical Formula. """ elements = deepcopy(self.elements) if isinstance(other_formula, str): other_formula = Formula(other_formula) for element, element_count in other_formula.elements.items(): if element in elements: elements[element] += element_count else: elements[element] = element_count new_formula = Formula() new_formula.id = new_formula.formula = reduce(lambda x, y: x + y, ['%s%i'%(k,v) for k, v in elements.items()]) new_formula.elements = elements new_formula.calculate_formula_weight() return new_formula
[docs] def parse_composition(self): """Breaks the chemical formula down by element. Useful for making sure Reactions are balanced.' TODO: Find a stable python package for parsing chemical formulas. """ try: starting_coefficient_re = re.compile('^\d+') if self.elements == '' or self.formula == None: self.elements = {} elif '*' in self.formula: tmp_formula = reduce(lambda x, y: x + y, map(Formula, self.formula.split('*'))) self.elements = tmp_formula.elements self.calculate_formula_weight() elif starting_coefficient_re.match(self.formula): #Check to see if the formula starts with an integer. If #it does then multiply all el #This is often the case for some salt hydrates (e.g. Na2WO4*2H2O) formula = starting_coefficient_re.split(self.formula)[-1] coefficient = int(self.formula.split(formula)[0]) tmp_formula = Formula(formula) [tmp_formula.elements.update({k: coefficient*v}) for k, v in tmp_formula.elements.items()] self.elements = tmp_formula.elements self.calculate_formula_weight() elif len(re.split('[^A-Za-z0-9]', self.formula)) > 1: self.elements = {} warn('The formula %s is not in empirical format so it will not be parsed'%self.formula) else: element_number_re = re.compile('\d+') #This will probably be better accomplished by breaking #on anything that preceeds a capital letter and elements = element_number_re.split(self.formula) if elements[-1] == '': #remove the empty element generated by the last item in #the_metabolite string being a number elements.pop() element_counts = map(float, element_number_re.findall(self.formula)) if element_counts > elements: print 'Have more element counts than elements in formula: ' +\ self.formula #return while len(element_counts) != len(elements): element_counts.append(1.0) #Split apart any elements that weren't separated by #a number and assign their count as 1 element_re=re.compile('[A-Z][a-z]*') tmp_element_list = [] tmp_count_list = [] for i_element in range(len(elements)): #Find all atoms in self.formula with the format Xx* split_element = element_re.findall(elements[i_element]) if len(split_element) == 1: tmp_element_list.append(split_element) tmp_count_list.append([element_counts[i_element]]) else: tmp_elements = [split_element.pop()] tmp_counts = [element_counts[i_element]] #The last element in the split_element is what has #the following count so no need to update element_counts yet split_element.reverse() for the_element in split_element: tmp_elements.append(the_element) tmp_counts.append(1) tmp_elements.reverse() tmp_counts.reverse() tmp_element_list.append(tmp_elements) tmp_count_list.append(tmp_counts) elements = [] element_counts = [] for the_element in tmp_element_list: elements += the_element for the_count in tmp_count_list: element_counts += the_count #Zip the elements / counts into dictionary format self.elements = dict(zip(elements, element_counts)) self.calculate_formula_weight() except: warn('Unable to parse your formula: %s. Perhaps it is not'%self.formula +\ ' empirical? Perhaps the composition has floats?')
[docs] def calculate_formula_weight(self, weight_dict=None): """Calculate the formula weight. weight_dict: None or a dictionary of elements and weights. """ if weight_dict is None: weight_dict = elements_and_molecular_weights try: self.weight = 0. self.weight = sum([the_count*weight_dict[the_element] for the_element, the_count in self.elements.items()]) except KeyError, e: self.weight = None warn('The element %s is not in the weight_dict'%e) # #END Class Formula ########################
elements_and_molecular_weights = { 'H' : 1.007940, 'He' : 4.002602, 'Li' : 6.941000, 'Be' : 9.012182, 'B' : 10.811000, 'C' : 12.010700, 'N' : 14.006700, 'O' : 15.999400, 'F' : 18.998403, 'Ne' : 20.179700, 'Na' : 22.989770, 'Mg' : 24.305000, 'Al' : 26.981538, 'Si' : 28.085500, 'P' : 30.973761, 'S' : 32.065000, 'Cl' : 35.453000, 'Ar' : 39.948000, 'K' : 39.098300, 'Ca' : 40.078000, 'Sc' : 44.955910, 'Ti' : 47.867000, 'V' : 50.941500, 'Cr' : 51.996100, 'Mn' : 54.938049, 'Fe' : 55.845000, 'Co' : 58.933200, 'Ni' : 58.693400, 'Cu' : 63.546000, 'Zn' : 65.409000, 'Ga' : 69.723000, 'Ge' : 72.640000, 'As' : 74.921600, 'Se' : 78.960000, 'Br' : 79.904000, 'Kr' : 83.798000, 'Rb' : 85.467800, 'Sr' : 87.620000, 'Y' : 88.905850, 'Zr' : 91.224000, 'Nb' : 92.906380, 'Mo' : 95.940000, 'Tc' : 98.000000, 'Ru' : 101.070000, 'Rh' : 102.905500, 'Pd' : 106.420000, 'Ag' : 107.868200, 'Cd' : 112.411000, 'In' : 114.818000, 'Sn' : 118.710000, 'Sb' : 121.760000, 'Te' : 127.600000, 'I' : 126.904470, 'Xe' : 131.293000, 'Cs' : 132.905450, 'Ba' : 137.327000, 'La' : 138.905500, 'Ce' : 140.116000, 'Pr' : 140.907650, 'Nd' : 144.240000, 'Pm' : 145.000000, 'Sm' : 150.360000, 'Eu' : 151.964000, 'Gd' : 157.250000, 'Tb' : 158.925340, 'Dy' : 162.500000, 'Ho' : 164.930320, 'Er' : 167.259000, 'Tm' : 168.934210, 'Yb' : 173.040000, 'Lu' : 174.967000, 'Hf' : 178.490000, 'Ta' : 180.947900, 'W' : 183.840000, 'Re' : 186.207000, 'Os' : 190.230000, 'Ir' : 192.217000, 'Pt' : 195.078000, 'Au' : 196.966550, 'Hg' : 200.590000, 'Tl' : 204.383300, 'Pb' : 207.200000, 'Bi' : 208.980380, 'Po' : 209.000000, 'At' : 210.000000, 'Rn' : 222.000000, 'Fr' : 223.000000, 'Ra' : 226.000000, 'Ac' : 227.000000, 'Th' : 232.038100, 'Pa' : 231.035880, 'U' : 238.028910, 'Np' : 237.000000, 'Pu' : 244.000000, 'Am' : 243.000000, 'Cm' : 247.000000, 'Bk' : 247.000000, 'Cf' : 251.000000, 'Es' : 252.000000, 'Fm' : 257.000000, 'Md' : 258.000000, 'No' : 259.000000, 'Lr' : 262.000000, 'Rf' : 261.000000, 'Db' : 262.000000, 'Sg' : 266.000000, 'Bh' : 264.000000, 'Hs' : 277.000000, 'Mt' : 268.000000, 'Ds' : 281.000000, 'Rg' : 272.000000, 'Cn' : 285.000000, 'Uuq': 289.000000, 'Uuh': 292.000000 }