Source code for Medfl.NetManager.dataset

import pandas as pd
from sqlalchemy import text

from scripts.base import my_eng
from .net_helper import *
from .net_manager_queries import (DELETE_DATASET, INSERT_DATASET,
                                  SELECT_ALL_DATASET_NAMES)


[docs]class DataSet:
[docs] def __init__(self, name: str, path: str, engine=None): """ Initialize a DataSet object. :param name: The name of the dataset. :type name: str :param path: The file path of the dataset CSV file. :type path: str """ self.name = name self.path = path self.engine = engine if engine is not None else my_eng
[docs] def validate(self): """ Validate name and path attributes. :raises TypeError: If name or path is not a string. """ if not isinstance(self.name, str): raise TypeError("name argument must be a string") if not isinstance(self.path, str): raise TypeError("path argument must be a string")
[docs] def upload_dataset(self, NodeId=-1): """ Upload the dataset to the database. :param NodeId: The NodeId associated with the dataset. :type NodeId: int Notes: - Assumes the file at self.path is a valid CSV file. - The dataset is uploaded to the 'DataSets' table in the database. """ data_df = pd.read_csv(self.path) nodeId = NodeId columns = data_df.columns.tolist() data_df = process_eicu(data_df) for index, row in data_df.iterrows(): query_1 = "INSERT INTO DataSets(DataSetName,nodeId," + "".join( f"{x}," for x in columns ) query_2 = f" VALUES ('{self.name}',{nodeId}, " + "".join( f"{is_str(data_df, row, x)}," for x in columns ) query = query_1[:-1] + ")" + query_2[:-1] + ")" self.engine.execute(text(query))
[docs] def delete_dataset(self): """ Delete the dataset from the database. Notes: - Assumes the dataset name is unique in the 'DataSets' table. """ self.engine.execute(text(DELETE_DATASET), {"name": self.name})
[docs] def update_data(self): """ Update the data in the dataset. Not implemented yet. """ pass
[docs] @staticmethod def list_alldatasets(engine): """ List all dataset names from the 'DataSets' table. :returns: A DataFrame containing the names of all datasets in the 'DataSets' table. :rtype: pd.DataFrame """ res = pd.read_sql(text(SELECT_ALL_DATASET_NAMES), engine) return res