Source code for snpio.utils.misc

from typing import Dict, List, Union

import numpy as np
import pandas as pd


[docs] def get_gt2iupac() -> Dict[str, str]: """Get a dictionary of genotype to IUPAC ambiguity codes.""" return { "1/1": "A", "2/2": "C", "3/3": "G", "4/4": "T", "1/2": "M", # A/C "1/3": "R", # A/G "1/4": "W", # A/T "2/3": "S", # C/G "2/4": "Y", # C/T "3/4": "K", # G/T "-9/-9": "N", # Missing data "-1/-1": "N", # Missing data }
[docs] def get_iupac2gt() -> Dict[str, str]: """Get a dictionary of IUPAC ambiguity codes to genotype.""" return { "A": "1/1", "C": "2/2", "G": "3/3", "T": "4/4", "M": "1/2", # A/C "R": "1/3", # A/G "W": "1/4", # A/T "S": "2/3", # C/G "Y": "2/4", # C/T "K": "3/4", # G/T "N": "-9/-9", # Missing data }
[docs] def get_int_iupac_dict() -> Dict[str, int]: """Get a dictionary of IUPAC ambiguity codes to integers.""" int_iupac_dict = { "A": 0, "T": 1, "G": 2, "C": 3, "W": 4, "R": 5, "M": 6, "K": 7, "Y": 8, "S": 9, "N": -9, } return int_iupac_dict
[docs] def get_onehot_dict() -> Dict[str, List[float]]: """Get a dictionary of IUPAC ambiguity codes to one-hot encoded vectors.""" onehot_dict = { "A": [1.0, 0.0, 0.0, 0.0], "T": [0.0, 1.0, 0.0, 0.0], "G": [0.0, 0.0, 1.0, 0.0], "C": [0.0, 0.0, 0.0, 1.0], "N": [0.0, 0.0, 0.0, 0.0], "W": [0.5, 0.5, 0.0, 0.0], "R": [0.5, 0.0, 0.5, 0.0], "M": [0.5, 0.0, 0.0, 0.5], "K": [0.0, 0.5, 0.5, 0.0], "Y": [0.0, 0.5, 0.0, 0.5], "S": [0.0, 0.0, 0.5, 0.5], "N": [0.0, 0.0, 0.0, 0.0], } return onehot_dict
[docs] def validate_input_type( X: Union[np.ndarray, pd.DataFrame, List[List[int]]], return_type: str = "array" ) -> Union[np.ndarray, pd.DataFrame, List[List[int]]]: """Validates the input type and returns it as a specified type. This function checks if the input `X` is a pandas DataFrame, numpy array, or a list of lists. It then converts `X` to the specified `return_type` and returns it. Args: X (pandas.DataFrame, numpy.ndarray, or List[List[int]]): The input data to validate and convert. return_type (str, optional): The type of the returned object. Supported options include: "df" (DataFrame), "array" (numpy array), and "list". Defaults to "array". Returns: pandas.DataFrame, numpy.ndarray, or List[List[int]]: The input data converted to the desired return type. Raises: TypeError: If `X` is not of type pandas.DataFrame, numpy.ndarray, or List[List[int]]. ValueError: If an unsupported `return_type` is provided. Supported types are "df", "array", and "list". Example: >>> X = [[1, 2, 3], [4, 5, 6]] >>> print(validate_input_type(X, "df")) 4 >>> # Outputs: a DataFrame with the data from `X`. """ if not isinstance(X, (pd.DataFrame, np.ndarray, list)): raise TypeError( f"X must be of type pandas.DataFrame, numpy.ndarray, " f"or List[List[int]], but got {type(X)}" ) if return_type == "array": if isinstance(X, pd.DataFrame): return X.to_numpy() elif isinstance(X, list): return np.array(X) elif isinstance(X, np.ndarray): return X.copy() elif return_type == "df": if isinstance(X, pd.DataFrame): return X.copy() elif isinstance(X, (np.ndarray, list)): return pd.DataFrame(X) elif return_type == "list": if isinstance(X, list): return X elif isinstance(X, np.ndarray): return X.tolist() elif isinstance(X, pd.DataFrame): return X.values.tolist() else: raise ValueError( f"Unsupported return type provided: {return_type}. Supported types " f"are 'df', 'array', and 'list'" )