#cobra/sbml.py: Tools for reading / writing SBML now contained in
#this module
#System modules
from .. import Model, Reaction, Metabolite, Formula
from os.path import isfile
from os import name as __name
from copy import deepcopy
from time import time
import re
#
if __name == 'java':
from org.sbml.jsbml import SBMLDocument, SpeciesReference, KineticLaw, Parameter
from org.sbml.jsbml import SBMLReader, SBMLWriter
__tmp_reader = SBMLReader()
__tmp_writer = SBMLWriter()
readSBML = __tmp_reader.readSBMLFromFile
writeSBML = __tmp_writer.writeSBMLToFile
from org.sbml.jsbml.Unit import Kind as __Kind
UNIT_KIND_MOLE = __Kind.MOLE
UNIT_KIND_GRAM = __Kind.GRAM
UNIT_KIND_SECOND = __Kind.SECOND
UNIT_KIND_DIMENSIONLESS = __Kind.DIMENSIONLESS
else:
from libsbml import SBMLDocument, SpeciesReference, KineticLaw, Parameter
from libsbml import readSBML, writeSBML
from libsbml import UNIT_KIND_MOLE, UNIT_KIND_GRAM, UNIT_KIND_SECOND, UNIT_KIND_DIMENSIONLESS
[docs]def parse_legacy_id(the_id, the_compartment=None, the_type='metabolite',
use_hyphens=False):
"""Deals with a bunch of problems due to bigg.ucsd.edu not following SBML standards
the_id: String.
the_compartment: String.
the_type: String. Currently only 'metabolite' is supported
use_hyphens: Boolean. If True, double underscores (__) in an SBML ID will be converted to hyphens
"""
if use_hyphens:
the_id = the_id.replace('__','-')
if the_type == 'metabolite':
if the_id.split('_')[-1] == the_compartment:
#Reformat Ids to match convention in Palsson Lab.
the_id = the_id[:-len(the_compartment)-1]
the_id += '[%s]'%the_compartment
return the_id
[docs]def create_cobra_model_from_sbml_file(sbml_filename, old_sbml=False, legacy_metabolite=False,
print_time=False, use_hyphens=False):
"""convert an SBML XML file into a cobra.Model object. Supports
SBML Level 2 Versions 1 and 4
sbml_filename: String.
old_sbml: Boolean. Set to True if the XML file has metabolite
formula appended to metabolite names. This was a poorly designed
artifact that persists in some models.
legacy_metabolite: Boolean. If True then assume that the metabolite id has
the compartment id appended after an underscore (e.g. _c for cytosol). This
has not been implemented but will be soon.
print_time: Boolean. Print the time requirements for different sections
use_hyphens: Boolean. If True, double underscores (__) in an SBML ID will be converted to hyphens
"""
# Ensure that the file exists
if not isfile(sbml_filename):
raise IOError('Your SBML file is not found: %s'%sbml_filename)
#Expressions to change SBML Ids to Palsson Lab Ids
metabolite_re = re.compile('^M_')
reaction_re = re.compile('^R_')
compartment_re = re.compile('^C_')
if print_time:
start_time = time()
model_doc = readSBML(sbml_filename)
if print_time:
print 'Loading %s took %1.2f seconds'%(sbml_filename,
time()-start_time)
sbml_model = model_doc.getModel()
sbml_model_id = sbml_model.getId()
sbml_species = sbml_model.getListOfSpecies()
sbml_reactions = sbml_model.getListOfReactions()
sbml_compartments = sbml_model.getListOfCompartments()
compartment_dict = dict([(compartment_re.split(x.getId())[-1], x.getName())
for x in sbml_compartments])
if legacy_metabolite:
#Deal with the palsson lab appending the compartment id to the metabolite id
new_dict = {}
for the_id, the_name in compartment_dict.items():
if the_name == '':
new_dict[the_id[0].lower()] = the_id
else:
new_dict[the_id] = the_name
compartment_dict = new_dict
legacy_compartment_converter = dict([(v,k)
for k, v in compartment_dict.items()])
if print_time:
start_time = time()
metabolite_dict = {}
#Convert sbml_metabolites to cobra.Metabolites
for sbml_metabolite in sbml_species:
#Skip sbml boundary species
if sbml_metabolite.getBoundaryCondition():
continue
if (old_sbml or legacy_metabolite) and \
sbml_metabolite.getId().endswith('_b'):
#Deal with incorrect sbml from bigg.ucsd.edu
continue
tmp_metabolite = Metabolite()
metabolite_id = tmp_metabolite.id = sbml_metabolite.getId()
tmp_metabolite.compartment = compartment_re.split(sbml_metabolite.getCompartment())[-1]
if legacy_metabolite:
if tmp_metabolite.compartment not in compartment_dict:
tmp_metabolite.compartment = legacy_compartment_converter[tmp_metabolite.compartment]
tmp_metabolite.id = parse_legacy_id(tmp_metabolite.id, tmp_metabolite.compartment,
use_hyphens=use_hyphens)
if use_hyphens:
tmp_metabolite.id = metabolite_re.split(tmp_metabolite.id)[-1].replace('__','-')
else:
#Just in case the SBML ids are ill-formed and use -
tmp_metabolite.id = metabolite_re.split(tmp_metabolite.id)[-1].replace('-','__')
tmp_metabolite.name = sbml_metabolite.getName()
tmp_formula = ''
tmp_metabolite.charge = sbml_metabolite.getCharge()
tmp_metabolite.notes = parse_legacy_sbml_notes(sbml_metabolite.getNotesString())
for the_key in tmp_metabolite.notes.keys():
if the_key.lower() == 'formula':
tmp_formula = tmp_metabolite.notes.pop(the_key)[0]
break
if tmp_formula == '' and old_sbml:
tmp_formula = tmp_metabolite.name.split('_')[-1]
tmp_metabolite.name = tmp_metabolite.name[:-len(tmp_formula)-1]
tmp_metabolite.formula = Formula(tmp_formula)
metabolite_dict.update({metabolite_id: tmp_metabolite})
if print_time:
print 'Parsing %s took %1.2f seconds'%('metabolites',
time()-start_time)
if print_time:
start_time = time()
#Construct the vectors and matrices for holding connectivity and numerical info
#to feed to the cobra toolbox.
#Always assume steady state simulations so b is set to 0
cobra_reaction_list = []
for sbml_reaction in sbml_reactions:
if use_hyphens:
#Change the ids to match conventions used by the Palsson lab.
reaction = Reaction(reaction_re.split(sbml_reaction.getId())[-1].replace('__','-'))
else:
#Just in case the SBML ids are ill-formed and use -
reaction = Reaction(reaction_re.split(sbml_reaction.getId())[-1].replace('-','__'))
cobra_reaction_list.append(reaction)
#reaction.exchange_reaction = 0
reaction.name = sbml_reaction.getName()
cobra_metabolites = {}
#Use the cobra.Metabolite class here
for sbml_metabolite in sbml_reaction.getListOfReactants():
tmp_metabolite_id = sbml_metabolite.getSpecies()
#This deals with boundary metabolites
if tmp_metabolite_id in metabolite_dict:
tmp_metabolite = deepcopy(metabolite_dict[tmp_metabolite_id])
cobra_metabolites[tmp_metabolite] = -sbml_metabolite.getStoichiometry()
else:
reaction.boundary = 'system_boundary'
for sbml_metabolite in sbml_reaction.getListOfProducts():
tmp_metabolite_id = sbml_metabolite.getSpecies()
#This deals with boundary metabolites
if tmp_metabolite_id in metabolite_dict:
tmp_metabolite = deepcopy(metabolite_dict[tmp_metabolite_id])
cobra_metabolites[tmp_metabolite] = sbml_metabolite.getStoichiometry()
else:
reaction.boundary = 'system_boundary'
#Parse the kinetic law info here.
parameter_dict = {}
# if isinstance(the_reaction.getKineticLaw(), NoneType):
if not sbml_reaction.getKineticLaw():
parameter_dict['lower_bound'] = -1000
parameter_dict['upper_bound'] = 1000
parameter_dict['objective_coefficient'] = 0
else:
for sbml_parameter in sbml_reaction.getKineticLaw().getListOfParameters():
parameter_dict[sbml_parameter.getId().lower()] = sbml_parameter.getValue()
if 'lower_bound' in parameter_dict:
the_key = 'lower_bound'
elif 'lower bound' in parameter_dict:
the_key = 'lower bound'
reaction.lower_bound = parameter_dict[the_key]
if 'upper_bound' in parameter_dict:
the_key = 'upper_bound'
elif 'upper bound' in parameter_dict:
the_key = 'upper bound'
reaction.upper_bound = parameter_dict[the_key]
if 'objective_coefficient' in parameter_dict:
the_key = 'objective_coefficient'
elif 'objective coefficient' in parameter_dict:
the_key = 'objective coefficient'
reaction.objective_coefficient = parameter_dict[the_key]
reaction_note_dict = parse_legacy_sbml_notes(sbml_reaction.getNotesString())
#Parse the reaction notes.
#POTENTIAL BUG: DEALING WITH LEGACY 'SBML' THAT IS NOT IN A
#STANDARD FORMAT
#TODO: READ IN OTHER NOTES AND GIVE THEM A reaction_ prefix.
#TODO: Make sure genes get added as objects
if reaction_note_dict.has_key('GENE ASSOCIATION'):
reaction.gene_reaction_rule = reaction_note_dict['GENE ASSOCIATION'][0]
reaction.parse_gene_association()
if reaction_note_dict.has_key('GENE LIST'):
reaction.systematic_names = reaction_note_dict['GENE LIST'][0]
elif reaction_note_dict.has_key('GENES') and \
reaction_note_dict['GENES'] != ['']:
reaction.systematic_names = reaction_note_dict['GENES'][0]
elif reaction_note_dict.has_key('LOCUS'):
gene_id_to_object = dict([(x.id, x) for x in reaction._genes])
for the_row in reaction_note_dict['LOCUS']:
tmp_row_dict = {}
the_row = 'LOCUS:' + the_row.lstrip('_').rstrip('#')
for the_item in the_row.split('#'):
try:
k, v = the_item.split(':')
tmp_row_dict[k] = v
except ValueError, e:
print the_item
raise e
tmp_locus_id = tmp_row_dict['LOCUS']
if 'TRANSCRIPT' in tmp_row_dict:
tmp_locus_id = tmp_locus_id + '.' + tmp_row_dict['TRANSCRIPT']
gene_id_to_object[tmp_locus_id].name = tmp_row_dict['ABBREVIATION']
if reaction_note_dict.has_key('SUBSYSTEM'):
reaction.subsystem = reaction_note_dict['SUBSYSTEM'][0]
reaction.reversibility = int(sbml_reaction.getReversible())
#TODO: Use the cobra.metabolite objects here.
reaction.add_metabolites(cobra_metabolites)
if print_time:
print 'Parsing %s took %1.2f seconds'%('reactions',
time()-start_time)
if print_time:
start_time = time()
#Now, add all of the reactions to the model.
cobra_model = Model(sbml_model_id)
cobra_model.description = sbml_model.getId()
#Populate the compartment list - This will be done based on cobra.Metabolites
#in cobra.Reactions in the future.
cobra_model.compartments = compartment_dict
cobra_model.add_reactions(cobra_reaction_list)
if print_time:
print '%s took %1.2f seconds'%('Adding reactions',
time()-start_time)
#cobra_model.update_rules()
return cobra_model
[docs]def parse_legacy_sbml_notes(note_string, note_delimiter = ':'):
"""Deal with legacy SBML format issues arising from the
COBRA Toolbox for MATLAB and BiGG.ucsd.edu developers.
"""
note_dict = {}
start_tag = '<p>'
end_tag = '</p>'
if '<html:p>' in note_string:
start_tag = '<html:p>'
end_tag = '</html:p>'
while start_tag in note_string and end_tag in note_string:
note_start = note_string.index(start_tag)
note_end = note_string.index(end_tag)
the_note = note_string[(note_start + len(start_tag)):note_end].lstrip(' ').rstrip(' ')
if note_delimiter in the_note:
note_delimiter_index = the_note.index(note_delimiter)
note_field = the_note[:note_delimiter_index].lstrip(' ').rstrip(' ').replace('_',' ').upper()
note_value = the_note[(note_delimiter_index+1):].lstrip(' ').rstrip(' ')
if note_dict.has_key(note_field ):
note_dict[note_field ].append(note_value)
else:
note_dict[note_field] = [note_value]
note_string = note_string[(note_end+len(end_tag)): ]
return note_dict
[docs]def write_cobra_model_to_sbml_file(cobra_model, sbml_filename,
sbml_level=2, sbml_version=1,
print_time=False):
"""Write a cobra.Model object to an SBML XML file.
cobra_model: A cobra.Model object
sbml_filename: The file to write the SBML XML to.
sbml_level: 2 is the only level supported at the moment.
sbml_version: 1 is the only version supported at the moment.
print_time: Boolean. Print the time requirements for different sections
TODO: Update the NOTES to match the SBML standard and provide support for
Level 2 Version 4
"""
#Add in the common compartment abbreviations. If there are additional compartments
#they also need to be added.
note_start_tag, note_end_tag = '<p>', '</p>'
if sbml_level > 2 or (sbml_level == 2 and sbml_version == 4):
note_start_tag, note_end_tag = '<html:p>', '</html:p>'
if not hasattr(cobra_model, 'compartments'):
cobra_model.compartments = {'c': 'cytosol',
'p': 'periplasm',
'e': 'extracellular'}
sbml_doc = SBMLDocument(sbml_level, sbml_version)
sbml_model = sbml_doc.createModel(cobra_model.description.split('.')[0])
#Note need to set units
reaction_units = 'mmol_per_gDW_per_hr'
model_units = sbml_model.createUnitDefinition()
model_units.setId(reaction_units)
sbml_unit = model_units.createUnit()
sbml_unit.setKind(UNIT_KIND_MOLE)
sbml_unit.setScale(-3)
sbml_unit = model_units.createUnit()
sbml_unit.setKind(UNIT_KIND_GRAM)
sbml_unit.setExponent(-1)
sbml_unit = model_units.createUnit()
sbml_unit.setKind(UNIT_KIND_SECOND)
sbml_unit.setMultiplier(1.0/60/60)
sbml_unit.setExponent(-1)
for the_key in cobra_model.compartments.keys():
sbml_comp = sbml_model.createCompartment()
sbml_comp.setId(the_key)
sbml_comp.setName(cobra_model.compartments[the_key])
sbml_comp.setSize(1) #Just to get rid of warnings
if print_time:
start_time = time()
#Use this dict to allow for fast look up of species id
#for references created in the reaction section.
metabolite_dict = {}
for cobra_metabolite in cobra_model.metabolites:
metabolite_dict[cobra_metabolite.id] = add_sbml_species(sbml_model,
cobra_metabolite,
note_start_tag=note_start_tag,
note_end_tag=note_end_tag)
if print_time:
print 'Adding %s took %1.2f seconds'%('metabolites',
time()-start_time)
if print_time:
start_time = time()
for the_reaction in cobra_model.reactions:
#This is probably the culprit. Including cobra.Reaction
#objects explicitly in cobra.Model will speed this up.
sbml_reaction = sbml_model.createReaction()
#Need to remove - for proper SBML. Replace with __
the_reaction_id = 'R_' + the_reaction.id.replace('-','__' )
sbml_reaction.setId(the_reaction_id)
if the_reaction.reversibility == 1:
sbml_reaction.setReversible(True)
else:
sbml_reaction.setReversible(False)
if the_reaction.name:
sbml_reaction.setName(the_reaction.name)
else:
sbml_reaction.setName(the_reaction.id)
#Add in the reactant/product references
for the_metabolite, the_coefficient in the_reaction._metabolites.items():
sbml_stoichiometry = the_coefficient
metabolite_id = str(metabolite_dict[the_metabolite.id])
#Each SpeciesReference must have a unique id
if sbml_stoichiometry < 0:
species_reference = sbml_reaction.createReactant()
else:
species_reference = sbml_reaction.createProduct()
species_reference.setId(metabolite_id + '_' + the_reaction_id)
species_reference.setSpecies(metabolite_id)
species_reference.setStoichiometry(abs(sbml_stoichiometry))
#Deal with the case where the reaction is a boundary reaction
if len(the_reaction._metabolites) == 1:
the_metabolite, the_coefficient = the_reaction._metabolites.items()[0]
the_metabolite = the_metabolite.copy()
metabolite_id = add_sbml_species(sbml_model, the_metabolite,
note_start_tag=note_start_tag,
note_end_tag=note_end_tag,
boundary_metabolite=True)
the_coefficient *= -1
#Each SpeciesReference must have a unique id
if sbml_stoichiometry < 0:
species_reference = sbml_reaction.createReactant()
else:
species_reference = sbml_reaction.createProduct()
species_reference.setId(metabolite_id + '_' + the_reaction_id)
species_reference.setSpecies(metabolite_id)
species_reference.setStoichiometry(abs(sbml_stoichiometry))
#Add in the kineticLaw
sbml_law = KineticLaw(sbml_level, sbml_version)
if hasattr(sbml_law, 'setId'):
sbml_law.setId('FLUX_VALUE')
sbml_law.setFormula('FLUX_VALUE')
reaction_parameter_dict = {'LOWER_BOUND': [the_reaction.lower_bound, reaction_units],
'UPPER_BOUND': [the_reaction.upper_bound, reaction_units],
'FLUX_VALUE': [0, reaction_units],
'OBJECTIVE_COEFFICIENT': [the_reaction.objective_coefficient,
'dimensionless']}
for k, v in reaction_parameter_dict.items():
sbml_parameter = Parameter(sbml_level, sbml_version)
sbml_parameter.setId(k)
if hasattr(v, '__iter__'):
sbml_parameter.setValue(v[0])
sbml_parameter.setUnits(v[1])
else:
sbml_parameter.setValue(v)
sbml_law.addParameter(sbml_parameter)
sbml_reaction.setKineticLaw(sbml_law)
sbml_reaction.setNotes('<html xmlns="http://www.w3.org/1999/xhtml">%sGENE_ASSOCIATION: %s%s%sSUBSYSTEM: %s%s</html>'%(note_start_tag,
the_reaction.gene_reaction_rule,
note_end_tag,
note_start_tag,
the_reaction.subsystem,
note_end_tag))
if print_time:
print 'Adding %s took %1.2f seconds'%('reactions',
time()-start_time)
writeSBML(sbml_doc, sbml_filename)
[docs]def add_sbml_species(sbml_model, cobra_metabolite, note_start_tag, note_end_tag, boundary_metabolite=False):
"""A helper function for adding cobra metabolites to an sbml model.
sbml_model: sbml_model object
cobra_metabolite: a cobra.Metabolite object
note_start_tag: the start tag for parsing cobra notes. this will eventually
be supplanted when COBRA is worked into sbml.
note_end_tag: the end tag for parsing cobra notes. this will eventually
be supplanted when COBRA is worked into sbml.
"""
sbml_species = sbml_model.createSpecies()
the_id = 'M_' + cobra_metabolite.id.replace('-', '__')
#Deal with legacy naming issues
the_compartment = cobra_metabolite.compartment
if the_id.endswith('[%s]'%the_compartment):
the_id = the_id[:-len('[%s]'%the_compartment)]
elif not the_id.endswith('_%s'%the_compartment):
the_id += '_%s'%the_compartment
if boundary_metabolite:
the_id += '_boundary'
sbml_species.setId(the_id)
metabolite_id = the_id
if boundary_metabolite:
sbml_species.setBoundaryCondition(True)
if cobra_metabolite.name:
sbml_species.setName(cobra_metabolite.name)
else:
sbml_species.setName(cobra_metabolite.id)
try:
sbml_species.setCompartment(the_compartment)
except:
print 'metabolite failed: %s'%the_id
return cobra_metabolite
if cobra_metabolite.charge is not None:
sbml_species.setCharge(cobra_metabolite.charge)
if hasattr(cobra_metabolite.formula, 'id') or \
hasattr(cobra_metabolite.notes, 'items'):
tmp_note = '<html xmlns="http://www.w3.org/1999/xhtml">'
if hasattr(cobra_metabolite.formula, 'id'):
tmp_note += '%sFORMULA: %s%s'%(note_start_tag,
cobra_metabolite.formula.id,
note_end_tag)
if hasattr(cobra_metabolite.notes, 'items'):
for the_id_type, the_id in cobra_metabolite.notes.items():
if not isinstance(the_id_type, str):
the_id_type = repr(the_id_type)
if hasattr(the_id, '__iter__') and len(the_id) == 1:
the_id = the_id[0]
if not isinstance(the_id, str):
the_id = repr(the_id)
tmp_note += '%s%s: %s%s'%(note_start_tag,
the_id_type,
the_id, note_end_tag)
sbml_species.setNotes(tmp_note + '</html>')
return metabolite_id
[docs]def fix_legacy_id(id, use_hyphens=False):
id = id.replace('_DASH_', '__')
id = id.replace('_FSLASH_', '/')
id = id.replace('_BSLASH_', "\\")
id = id.replace('_LPAREN_', '(')
id = id.replace('_LSQBKT_', '[')
id = id.replace('_RSQBKT_', ']')
id = id.replace('_RPAREN_', ')')
id = id.replace('_COMMA_', ',')
id = id.replace('_PERIOD_', '.')
id = id.replace('_APOS_', "'")
id = id.replace('&', '&')
id = id.replace('<', '<')
id = id.replace('>', '>')
id = id.replace('"', '"')
if use_hyphens:
id = id.replace('__', '-')
else:
id = id.replace("-", "__")
return id
[docs]def read_legacy_sbml(filename, use_hyphens=False):
"""read in an sbml file and fix the sbml id's"""
model = create_cobra_model_from_sbml_file(filename)
for metabolite in model.metabolites:
metabolite.id = fix_legacy_id(metabolite.id)
model.metabolites._generate_index()
for reaction in model.reactions:
reaction.id = fix_legacy_id(reaction.id)
if reaction.id.startswith("EX_") and reaction.id.endswith("(e)"):
reaction.id = reaction.id[:-3] + "_e"
reaction.reconstruct_reaction()
model.reactions._generate_index()
return model