from __future__ import with_statement
#cobra.flux_analysis.double_deletion.py
#runs flux variablity analysis on a Model object.
from math import floor,ceil
from numpy import zeros #only item to replace to gain jython compatibility
from copy import deepcopy
from warnings import warn
from os import name as __name
nan = float('nan')
from sys import modules as __modules
from warnings import warn
if __name == 'java':
raise Exception("%s is not yet supported on jython"%__modules[__name__])
warn("moma is not supported on %s"%__name)
def moma(**kwargs):
warn("moma is not supported on %s"%__name)
else:
from .moma import moma
try:
from cPickle import dump
except:
from pickle import dump
try:
from cobra.external.ppmap import ppmap
__parallel_mode_available = True
except:
__parallel_mode_available = False
from ..manipulation import initialize_growth_medium
from ..manipulation import delete_model_genes, undelete_model_genes
def __double_deletion_parallel(cobra_model, number_of_processes=4,
elements_of_interest=None, method='fba',
the_problem='return', element_type='gene',
solver='glpk',
error_reporting=None):
"""Wrapper for double_gene_deletion_parallel and the currently
unimplemented double_reaction_deletion_parallel functions
cobra_model: a cobra.Model object
number_of_processes: is the number of parallel processes to start
elements_of_interest: Is None, a list of genes, or a list of two lists of
genes. If None then double_deletion is run on all genes in
the_model.genes. If a list of genes then double_deletion is run for all
combinations of genes in double_deletion. If a list of of two lists of
genes then double_deletion is run for each member of one list vs. each
member of the second list.
method: 'fba' or 'moma' to run flux balance analysis or minimization
of metabolic adjustments.
the_problem: Is None or 'reuse'
element_type: 'gene' or 'reaction'
solver: 'glpk', 'gurobi', or 'cplex'.
error_reporting: None or True.
Returns a dictionary of the genes in the x dimension (x), the y
dimension (y), and the growth simulation data (data).
"""
if not __parallel_mode_available:
print 'Parallel mode not available is Parallel Python installed?'
return
if the_problem:
#The solver model objects are not thread safe so change the_problem
#to 'return' if one is provided.
the_problem='return'
if element_type == 'gene':
return __double_gene_deletion_parallel(cobra_model, number_of_processes=number_of_processes,
genes_of_interest=elements_of_interest,
method=method,
the_problem=the_problem, solver=solver,
error_reporting=error_reporting)
else:
raise Exception("Double deletion not yet implemented for element_type = %s"%element_type)
[docs]def double_deletion(cobra_model, element_list_1=None, element_list_2=None,
method='fba', single_deletion_growth_dict=None, the_problem='return',
element_type='gene', solver='glpk', error_reporting=None,
number_of_processes=1):
"""Wrapper for double_gene_deletion and the currently unimplemented
double_reaction_deletion functions
cobra_model: a cobra.Model object
element_list_1: Is None or a list of elements (genes or reactions)
element_list_2: Is None or a list of elements (genes or reactions)
method: 'fba' or 'moma' to run flux balance analysis or minimization
of metabolic adjustments.
single_deletion_growth_dict: A dictionary that provides the growth
rate information for single gene knock outs. This can speed up
simulations because nonviable single deletion strains imply that all
double deletion strains will also be nonviable.
the_problem: Is None or 'reuse'
element_type: 'gene' or 'reaction'
solver: 'glpk', 'gurobi', or 'cplex'.
error_reporting: None or True
Returns a dictionary of the elements in the x dimension (x), the y
dimension (y), and the growth simulation data (data).
"""
if number_of_processes > 1:
elements_of_interest = [x for x in [element_list_1,
element_list_2]
if x is not None]
if len(elements_of_interest) == 0:
elements_of_interest = None
return __double_deletion_parallel(cobra_model, number_of_processes=number_of_processes,
elements_of_interest=elements_of_interest, method=method,
the_problem=the_problem, solver=solver, element_type=element_type,
error_reporting=error_reporting)
else:
if element_type == 'gene':
return double_gene_deletion(cobra_model, gene_list_1=element_list_1,
gene_list_2=element_list_2, method=method,
single_deletion_growth_dict=single_deletion_growth_dict,
the_problem=the_problem, solver=solver,
error_reporting=error_reporting)
else:
raise Exception("Double deletion not yet implemented for element_type = %s"%element_type)
[docs]def double_gene_deletion(cobra_model, gene_list_1=None, gene_list_2=None,
method='fba', single_deletion_growth_dict=None,
the_problem='return', solver='glpk', growth_tolerance=1e-8,
error_reporting=None):
"""This will disable reactions for all gene pairs from gene_list_1 and
gene_list_2 and then run simulations to optimize for the objective
function. The contribution of each reaction to the objective function
is indicated in cobra_model.reactions[:].objective_coefficient vector.
NOTE: We've assumed that there is no such thing as a synthetic rescue with
this modeling framework.
cobra_model: a cobra.Model object
gene_list_1: Is None or a list of genes. If None then both gene_list_1
and gene_list_2 are assumed to correspond to cobra_model.genes.
gene_list_2: Is None or a list of genes. If None then gene_list_2 is
assumed to correspond to gene_list_1.
method: 'fba' or 'moma' to run flux balance analysis or minimization
of metabolic adjustments.
single_deletion_growth_dict: A dictionary that provides the growth
rate information for single gene knock outs. This can speed up
simulations because nonviable single deletion strains imply that all
double deletion strains will also be nonviable.
the_problem: Is None, 'return', or an LP model object for the solver.
solver: 'glpk', 'gurobi', or 'cplex'.
error_reporting: None or True
growth_tolerance: float. The effective lower bound on the growth rate
for a single deletion that is still considered capable of growth.
Returns a dictionary of the gene ids in the x dimension (x) and the y
dimension (y), and the growth simulation data (data).
"""
#BUG: Since this might be called from ppmap, the modules need to
#be imported. Modify ppmap to take depfuncs
from numpy import zeros
nan = float('nan')
from cobra.flux_analysis.single_deletion import single_deletion
from cobra.manipulation import initialize_growth_medium
from cobra.manipulation import delete_model_genes, undelete_model_genes
##TODO: Use keywords instead
if isinstance(cobra_model, dict):
tmp_dict = cobra_model
cobra_model = tmp_dict['cobra_model']
if 'gene_list_1' in tmp_dict:
gene_list_1 = tmp_dict['gene_list_1']
if 'gene_list_2' in tmp_dict:
gene_list_2 = tmp_dict['gene_list_2']
if 'method' in tmp_dict:
method = tmp_dict['method']
if 'the_problem' in tmp_dict:
the_problem = tmp_dict['the_problem']
if 'single_deletion_growth_dict' in tmp_dict:
single_deletion_growth_dict = tmp_dict['single_deletion_growth_dict']
if 'solver' in tmp_dict:
solver = tmp_dict['solver']
if 'error_reporting' in tmp_dict:
error_reporting = tmp_dict['error_reporting']
else:
cobra_model = cobra_model
#this is a slow way to revert models.
wt_model = cobra_model #NOTE: It may no longer be necessary to use a wt_model
#due to undelete_model_genes
if gene_list_1 is None:
gene_list_1 = cobra_model.genes
elif not hasattr(gene_list_1[0], 'id'):
gene_list_1 = map(cobra_model.genes.get_by_id, gene_list_1)
#Get default values to use if the deletions do not alter any reactions
the_problem = cobra_model.optimize(the_problem=the_problem, solver=solver)
basal_f = cobra_model.solution.f
if method.lower() == 'moma':
wt_model = cobra_model.copy()
the_problem = 'return'
combined_model = None
single_gene_set = set(gene_list_1)
if gene_list_2 is not None:
if not hasattr(gene_list_2[0], 'id'):
gene_list_2 = map(cobra_model.genes.get_by_id, gene_list_2)
single_gene_set.update(gene_list_2)
#Run the single deletion analysis to account for double deletions that
#target the same gene and lethal deletions. We assume that there
#aren't synthetic rescues.
single_deletion_growth_dict = single_deletion(cobra_model,
list(single_gene_set),
method=method,
the_problem=the_problem,
solver=solver,
error_reporting=error_reporting)[0]
if gene_list_2 is None or gene_list_1 == gene_list_2:
number_of_genes = len(gene_list_1)
gene_list_2 = gene_list_1
deletion_array = zeros([number_of_genes, number_of_genes])
##TODO: Speed up this triangular process
#For the case where the contents of the lists are the same cut the work in half.
#There might be a faster way to do this by using a triangular array function
#in numpy
#Populate the diagonal from the single deletion lists
for i, the_gene in enumerate(gene_list_1):
deletion_array[i, i] = single_deletion_growth_dict[the_gene.id]
for i, gene_1 in enumerate(gene_list_1[:-1]):
#TODO: Since there cannot be synthetic rescues we can assume
#that the whole row for a lethal deletion
#will be equal to that deletion.
if single_deletion_growth_dict[gene_1.id] < growth_tolerance:
tmp_solution = single_deletion_growth_dict[gene_1.id]
for j in range(i+1, number_of_genes):
deletion_array[j, i] = deletion_array[i, j] = tmp_solution
else:
for j, gene_2 in enumerate(gene_list_1[i+1:], i+1):
if single_deletion_growth_dict[gene_2.id] < growth_tolerance:
tmp_solution = single_deletion_growth_dict[gene_2.id]
else:
delete_model_genes(cobra_model, [gene_1, gene_2])
if cobra_model._trimmed:
if method.lower() == 'fba':
#Assumes that the majority of perturbations don't change
#reactions which is probably false
cobra_model.optimize(the_problem = the_problem,
solver=solver, error_reporting=error_reporting)
the_status = cobra_model.solution.status
tmp_solution = cobra_model.solution.f
elif method.lower() == 'moma':
try:
moma_solution = moma(wt_model, cobra_model,
combined_model=combined_model,
solver=solver, the_problem=the_problem)
tmp_solution = float(moma_solution.pop('objective_value'))
the_problem = moma_solution.pop('the_problem')
the_status = moma_solution.pop('status')
combined_model = moma_solution.pop('combined_model')
del moma_solution
except:
tmp_solution = nan
the_status = 'failed'
if the_status not in ['opt', 'optimal'] and \
error_reporting:
print '%s / %s: %s status: %s'%(gene_1, gene_2, solver,
the_status)
#Reset the model to orginial form.
undelete_model_genes(cobra_model)
else:
tmp_solution = basal_f
deletion_array[j, i] = deletion_array[i, j] = tmp_solution
else:
deletion_array = zeros([len(gene_list_1), len(gene_list_2)])
#Now deal with the case where the gene lists are different
for i, gene_1 in enumerate(gene_list_1):
if single_deletion_growth_dict[gene_1.id] <= 0:
for j in range(len(gene_list_2)):
deletion_array[i, j] = 0.
else:
for j, gene_2 in enumerate(gene_list_2):
#Assume no such thing as a synthetic rescue
if single_deletion_growth_dict[gene_2.id] <= growth_tolerance:
tmp_solution = single_deletion_growth_dict[gene_2.id]
else:
delete_model_genes(cobra_model, [gene_1, gene_2])
if cobra_model._trimmed:
if method.lower() == 'fba':
cobra_model.optimize(the_problem=the_problem,
solver=solver, error_reporting=error_reporting)
tmp_solution = cobra_model.solution.f
the_status = cobra_model.solution.status
elif method.lower() == 'moma':
try:
moma_solution = moma(wt_model, cobra_model,
combined_model=combined_model,
solver=solver, the_problem=the_problem)
tmp_solution = float(moma_solution.pop('objective_value'))
the_problem = moma_solution.pop('the_problem')
the_status = moma_solution.pop('status')
combined_model = moma_solution.pop('combined_model')
del moma_solution
except:
tmp_solution = nan
the_status = 'failed'
if the_status not in ['opt', 'optimal'] and \
error_reporting:
print '%s / %s: %s status: %s'%(repr(gene_1), repr(gene_2), solver,
cobra_model.solution.status)
#Reset the model to wt form
undelete_model_genes(cobra_model)
else:
tmp_solution = basal_f
deletion_array[i, j] = tmp_solution
if hasattr(gene_list_1, 'id'):
gene_list_1 = [x.id for x in gene_list_1]
if hasattr(gene_list_2, 'id'):
gene_list_2 = [x.id for x in gene_list_2]
return({'x': gene_list_1, 'y': gene_list_2, 'data': deletion_array})
def __double_gene_deletion_parallel(cobra_model, number_of_processes=4,
genes_of_interest=None, method = 'fba',
the_problem='return', solver='glpk',
error_reporting=None):
"""Provides a wrapper to run the double_deletion function on
multicore systems.
cobra_model: a Model object
number_of_processes: is the number of parallel processes to start
genes_of_interest: Is None, a list of genes, or a list of two lists of
genes. If None then double_deletion is run on all genes in
cobra_model.genes. If a list of genes then double_deletion is run for all
combinations of genes in double_deletion. If a list of of two lists of
genes then double_deletion is run for each member of one list vs. each
member of the second list.
method: 'fba' or 'moma' to run flux balance analysis or minimization
of metabolic adjustments.
the_problem: Is None or 'reuse'
solver: 'glpk', 'gurobi', or 'cplex'.
error_reporting: None or True
returns a dictionary with the keys x, y, and data
data: A numpy array of the simulation results for the growth_rates
x: A list of the genes for the x dimension of data.
y: A list of the genes for the y dimension of y.
**NOTE: While the genes in x and y correspond to the content from the input gene_lists,
they are not guaranteed to be in the same order as the gene_lists because the subprocesses
may run at different speeds.
"""
if not __parallel_mode_available:
print 'Parallel mode not available is Parallel Python installed'
return
from numpy import vstack
if the_problem:
the_problem='return'
if not genes_of_interest:
#If no genes_of_interest are specified then assume we want to
#compare all genetic interactions in the network
second_gene_list = all_genes = [x.id for x in cobra_model.genes]
elif isinstance(genes_of_interest[0], str):
#If genes_of_interest is a list then assume the list be scanned
#for interactions with all genes in the network
all_genes = genes_of_interest
second_gene_list = all_genes
elif hasattr(genes_of_interest[0], 'id'):
#Make sure we're dealing with strings instead of objects because we
#haven't audited this for thread safety
second_gene_list = all_genes = [x.id for x in genes_of_interest]
elif hasattr(genes_of_interest[0], '__iter__'):
second_gene_list = all_genes = genes_of_interest[0]
if len(genes_of_interest) == 2:
second_gene_list = genes_of_interest[1]
if hasattr(all_genes[0], 'id'):
all_genes = [x.id for x in all_genes]
if hasattr(second_gene_list[0], 'id'):
second_gene_list = [x.id for x in second_gene_list]
#Get basic numbers to guide how the problem should be divided for parallel execution.
transpose_results = False
if len(all_genes) < len(second_gene_list):
all_genes, second_gene_list = second_gene_list, all_genes
transpose_results = True
total_gene_count = len(all_genes)
if total_gene_count < number_of_processes:
number_of_processes = total_gene_count
division_count = total_gene_count / number_of_processes
the_rows = []
for i in range(number_of_processes-1):
the_rows.append({'cobra_model': cobra_model.copy(), 'method': method,
'gene_list_1': deepcopy(all_genes[i*division_count:division_count*(i+1)]),
'gene_list_2': deepcopy(second_gene_list), 'the_problem': the_problem,
'solver': solver,
'error_reporting': error_reporting})
the_rows.append({'cobra_model': cobra_model.copy(), 'method': method,
'gene_list_1': deepcopy(all_genes[(number_of_processes-1)*division_count:]),
'gene_list_2': deepcopy(second_gene_list), 'the_problem': the_problem,
'solver': solver,
'error_reporting': error_reporting})
tmp_pp = list(ppmap(number_of_processes, double_gene_deletion, the_rows))
gene_list_x = tmp_pp[0]['x']
gene_list_y = tmp_pp[0]['y']
double_deletion_data = tmp_pp[0]['data']
if transpose_results:
gene_list_x, gene_list_y = gene_list_y, gene_list_x
double_deletion_data = double_deletion_data.transpose()
for the_result in tmp_pp[1:]:
gene_list_x += the_result['x']
double_deletion_data = vstack((double_deletion_data, the_result['data']))
#cobra_model.double_deletion_growth_rate = double_deletion_data
#cobra_model.double_deletion_genes_x = gene_list_x
#cobra_model.double_deletion_genes_y = gene_list_y
return({'x': gene_list_x, 'y': gene_list_y, 'data': double_deletion_data})