#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 16 20:50:39 2019
@author: mwu
"""
from __future__ import absolute_import
import networkx as nx
import pandas as pd
from community import community_louvain
import snf
import numpy as np
import warnings
import networkx.algorithms.traversal as nextra
from ._de_bruijn import construct_graph, output_contigs
from ._random_walk import supervised_random_walk
from ._ensemble_classifier import _generate_x_y, ensemble_classifier
def __init__():
warnings.simplefilter("ignore")
def __linkage_to_adjlink(linkage_table, node_list):
"""
convert linkage table to weighted adjacency matrix
"""
adjlink_matrix = pd.DataFrame(0, columns=node_list, index=node_list, dtype=np.float)
# source, target, score = list(linkage_table.columns)
for i in range(0, len(linkage_table)):
if (linkage_table['source'][i] in node_list) & (linkage_table['target'][i] in node_list):
adjlink_matrix.loc[linkage_table['source'][i]][linkage_table['target'][i]] = linkage_table['weight'][i]
adjlink_matrix.loc[linkage_table['target'][i]][linkage_table['source'][i]] = linkage_table['weight'][i]
else:
break
return np.array(adjlink_matrix)
def __snf_based_merge(link_1, link_2):
"""
snf network merge based on Wang, Bo, et al. Nature methods 11.3 (2014): 333.
"""
warnings.simplefilter("ignore")
node_list = list(set(link_1['source']) & set(link_1['target']) & set(link_2['source']) & set(link_2['target']))
adjlinks = list()
adjlinks.append(__linkage_to_adjlink(link_1, node_list))
adjlinks.append(__linkage_to_adjlink(link_2, node_list))
affinity_matrix = snf.make_affinity(adjlinks)
fused_network = snf.snf(affinity_matrix)
Graph = nx.from_pandas_adjacency(pd.DataFrame(fused_network, index=node_list, columns=node_list))
return pd.DataFrame(Graph.edges, columns=['source', 'target'])
[docs]def buildnet(gnetdata, key_links, top=None):
"""
Given linkage table, build gene correlation graph
----------------------------------------------------
:param gnetdata: Gnetdata object.
:param key_links: str, key of links referring which linkage table for buidling graph
:param top: int, default None. top ranked links
:return: Gnetdata object with graph added into NetAttrs
"""
top = gnetdata.NetAttrs[key_links].shape[0] if top is None else top
links_filter = gnetdata.NetAttrs[key_links].sort_values('weight', ascending=False).head(top)
G = nx.from_pandas_edgelist(links_filter,
source="source",
target="target",
edge_attr=True)
gnetdata._add_netattr('graph', G)
gnetdata._add_netattr_para('top', str(top))
print('graph added into NetAttrs')
return gnetdata
[docs]def get_centrality(gnetdata):
"""
Measure node centrality in the network.
---------------------------------------
:param gnetdata: Gnetdata object.
:return: gnetData object with 'centralities' added into NetAttrs
"""
G = gnetdata.NetAttrs['graph']
centralities = pd.DataFrame(list(G.nodes), columns=['node'])
centralities['betweenness'] = pd.DataFrame.from_dict(list(nx.betweenness_centrality(G).items()))[1]
centralities['closeness'] = pd.DataFrame.from_dict(list(nx.closeness_centrality(G).items()))[1]
centralities['degree'] = pd.DataFrame.from_dict(list(nx.degree_centrality(G).items()))[1]
centralities['pageRank'] = pd.DataFrame.from_dict(list(nx.pagerank(G).items()))[1]
gnetdata.NetAttrs['centralities'] = centralities
print('node centralities added into NetAttrs.')
return gnetdata
[docs]def find_consensus_graph(gnetdata, link_key='all', method='intersection', toprank=100, threshold=None, **kwargs):
"""
Given multiple linkage tables, it predicts consensus links.
------------------------------------------------------------
:param gnetdata: Gnetdata object.
:param link_key: str, default all. key referring to linkage table.
:param method: str, default intersection. methods for detecting consensus links. Note: intersection is recommended when there are less than 3 linkage tables.
:param toprank: int, default 100. top ranked edges for intersection method.
:param threshold: int, default None. set threshold for ensemble method.
:return: Gnetdata object with consensus links added into NetAttrs.
"""
assert method in ['intersection', 'snf', 'ensemble'], 'only following methods acceptable : intersection, snf, ensemble'
keys = list(filter(lambda x: 'links' in x, gnetdata.NetAttrs.keys())) if link_key == 'all' else link_key
if method == 'intersection':
merged_links = gnetdata.NetAttrs[keys[0]]
for i in range(1, len(keys)):
merged_links = graph_merge(merged_links, gnetdata.NetAttrs[keys[i]], method=method, toprank=toprank)
elif method == 'ensemble':
if threshold is None:
raise Exception('threshold cannot be none!')
links_dict = dict(filter(lambda i: i[0] in keys, gnetdata.NetAttrs.items()))
X, Y = _generate_x_y(links_dict, threshold)
merged_links = ensemble_classifier(X, Y, **kwargs)
print('there are {} consensus edges found!'.format(merged_links.shape[0]))
gnetdata._add_netattr('consensus', merged_links)
return gnetdata
[docs]def graph_merge(link_1, link_2, toprank=None, method='union'):
"""
Given two graphs, it returns merged graph.
----------------------------------------------
:param link_1: dataframe. linkage table of graph_1
:param link_2: dataframe. linkage table of graph_2
:param toprank: int, default None. top edges from each methods for graph_merge
:param method: str, default union. methods:[union, intersection, snf]. snf refers to similarity network fusion.
:return: dataframe, merged linkage
"""
assert method in ['union', 'intersection', 'snf'], 'valid method parameter: union, intersection, knn!'
if method in ['union', 'intersection']:
toprank = min(link_1.shape[0], link_2.shape[0]) if toprank is None else toprank
link_1 = link_1.sort_values('weight', ascending=False).head(toprank)
link_2 = link_2.sort_values('weight', ascending=False).head(toprank)
mergedlinks = pd.merge(link_1.iloc[:, :-1], link_2.iloc[:, :-1], how='outer' if method == 'union' else 'inner')
mergedlinks['weight'] = np.repeat(1, mergedlinks.shape[0])
elif method == 'snf':
mergedlinks = __snf_based_merge(link_1, link_2)
return mergedlinks
[docs]def graph_traveral(graph, start, threshold, method='bfs'):
"""
Given a graph, it provides graph traversal techniques including breadth-first search (bsf) and depth-first search (dfs) to explore hidden gene/tf associations.
-----------------------------------------------------------------------------
:param graph: network graph object.
:param start: str. starting point of graph.
:type start: str
:param threshold: int. the depth-limit
:param method: str. bfs or dfs
:return: explored graph.
"""
assert method in ['bfs', 'dfs'], 'valid method parameters: bfs, dfs!'
if method == 'bfs':
res_path = nextra.bfs_tree(graph, start, threshold)
elif method == 'dfs':
res_path = nextra.dfs_tree(graph, start, threshold)
return res_path
[docs]def random_walk(gnetdata, start, supervisedby, steps):
"""
Supervised random walk guided by node centrality attribute.
------------------------------------------------------------
:param gnetdata: Gnetdata object
:param start: str, starting point of graph.
:param supervisedby: str, 'betweenness', 'closeness', 'degree' or 'pageRank'
:param steps: int, number of steps.
:return: a list of travelled nodes.
"""
path = supervised_random_walk(gnetdata=gnetdata, start=start, supervisedby=supervisedby, steps=steps)
return path
[docs]def path_merge(path_1, path_2, k_mer=3, path='Eulerian'):
"""
TODO: perform de bruijn graph mapping for ginve two path lists
"""
g = construct_graph([path_1, path_2], k_mer)
merged_path = output_contigs(g)
return merged_path