Source code for pycroc.scanpy_to_pycroc

import pandas as pd
import numpy as np
from anndata import AnnData

[docs]def scanpy_to_pycroc(AnnData, gene_list=[], obs_name=None , case_class=None, case_label=None): """ A function to convert a scanpy AnnData object into a pycroc data (both labelled and unlabelled). By specifying a gene list (if the genes are in AnnData.var_names), it subsets the AnnData object and it retreives their expression values. If a pycroc training dataset (labelled) is required, an AnnData.obs name and one or more obs categories to be set as case class must be specified. Parameters ---------- AnnData: AnnData a scanpy AnnData. gene_list: list(str) a list of gene names. obs_name: str name of the AnnData.obs to be used for labelling. Required if unclassified_data is False. case_class: str or list(str) a string or a list of strings that specifies one or more categories, of the obs selected with obs_name, to be set as case class. Required if unclassified_data is False. case_label: str a string that specifies the label to assign to observations belonging to case class. Required if unclassified_data is False. Returns ------- data (pandas.DataFrame): a properly formatted (labelled or unlabelled) pycroc object, which is analogue to the object returned by load_data(). """ gene_list=[i for i in gene_list if i in AnnData.var_names] gene_list= sorted(gene_list) markers=pd.DataFrame(AnnData[:,gene_list].X, columns=gene_list) IDs= pd.DataFrame() IDs['ID']= AnnData.obs.index if obs_name != None: if type(case_class) == str: IDs['Class'] = np.array(AnnData.obs[obs_name] == case_class) IDs['Class'].replace(to_replace=[True, False], value=[case_label, 'Others'], inplace=True) elif type(case_class) == list: case = [] for n in case_class: case.append(np.array(AnnData.obs[obs_name] == n)) IDs['Class'] = np.logical_or.reduce(case) IDs['Class'].replace(to_replace=[True, False], value=[case_label, 'Others'], inplace=True) else: raise TypeError( "case_class must be a str or a list(str)") data = pd.concat([IDs, markers], axis=1) return data