Source code for ada.io.file_manager

# mypy: ignore-errors
from contextlib import redirect_stdout
import sys
import os
from typing import Callable
import csv
import numpy as np
from tqdm import tqdm

from ada.data_containers._base import _Raw, _ActiData
from ada.data_containers.generic import GenericData
from ada.data_containers.scored import ScoredShort
from ada.io.geneactiv import RawGeneActiv, GeneActivMVM
from ada.io.actigraph import RawActiGraph
from ada.io.acti_eeg import ActiPSG  # , ObciActiRaw
from ada.io.mesa import Mesa
from ada.data_containers._tag_reader import TagsFileReader
from ada.long.cosinor import CosinorResults
from ada.long.ar import ARResults
from ada.long.nonparametric import DFAResults, SimpleNonparametricResults
from ada.data_containers._long import _LongContainer

from xml.dom.minidom import parse


[docs] class FileManager: """A collection of methods facilitating IO operations. Recommended way to perform IO.""" @staticmethod def __get_stdout(verbose): if verbose: return sys.stdout return open(os.devnull, 'w')
[docs] @staticmethod def load_file(path: str, verbose: bool = True): """Load actigraphic data from given file. Detects format form all known to ada automatically. Args: path (str): Path to the file. verbose (bool, optional): Whether to print out loading progress. Defaults to True. Raises: ValueError: File extension cannot be handled properly. Returns: Object containing data from file. """ with redirect_stdout(FileManager.__get_stdout(verbose)): if path.split('.')[-1].strip() == 'ada': data = GenericData.load_file(path) if data.scoring_method_metadata is not None: data = ScoredShort(data.data, data.metadata, data.fs, data.epoching_method_metadata, data.scoring_method_metadata) elif path.split('.')[-1].strip() == 'gt3x': data = RawActiGraph.load_file(path) elif path.split('.')[-1].strip() == 'bin': # TODO now this does not work anymore for files without .bin (ear eeg)... data = RawGeneActiv.load_file(path) elif path.split('.')[-1].strip() == 'csv': with open(path, 'r') as f: first_line = f.readline() # TODO Gotta change it one day so the file will not be opened multiple times. But this means need to rewrite all load_file funcitons... if '---GENERIC CSV GENERATED BY ADA---' in first_line: data = FileManager._read_generic_csv(path) elif 'GENEActiv' in first_line: try: data = GeneActivMVM.load_file(path) except ValueError: data = RawGeneActiv.load_file(path) elif 'mesaid' in first_line: data = Mesa.load_file(path) elif "Firmware" in first_line: data = RawActiGraph.load_file(path) # elif path.split('.')[-1].strip() == 'raw': # data = ObciActiRaw.load_file(path) # data = data.acti_data elif path.split('.')[-1].strip() == 'zip': data = ActiPSG.load_file(path) elif path.split('.')[-1].strip() == 'long': temp, fit_params = _LongContainer._generic_load(path) if 'exponent' in fit_params.keys(): data = DFAResults._construct_from_load(temp, fit_params) elif 'amplitude' in fit_params.keys(): data = CosinorResults._construct_from_load(temp, fit_params) elif 'noise std' in fit_params.keys(): data = ARResults._construct_from_load(temp, fit_params) elif 'M10' in fit_params.keys(): data = SimpleNonparametricResults._construct_from_load(temp, fit_params) else: raise ValueError("Unknown file format") return data
[docs] @staticmethod def export_generic(path: str, data: _ActiData, verbose: bool = True): """Export given actigraphic data object to generic format (convert if necessary). Args: path (str): Path to the output file. data (_ActiData): Actigraphic data to be exported. verbose (bool, optional): Whether to print out exporting progress. Defaults to True. """ with redirect_stdout(FileManager.__get_stdout(verbose)): to_dump = GenericData.from_nongeneric(data) to_dump.export(path) del to_dump
# This can mess up sample synchronization if used on already epoched data. In general ensure starts of the data are close and all should be good. # Use synchronize_recordings.py first, and then run this function on exported and trimmed actigraphic data if you want to epoch separately. # The synchronization will be either preserved, or data will be off by one sample, which in case of long epochs (eg 30s) is bad, but not problematic at all for raw data.
[docs] @staticmethod def load_ActiPSG_from_tags(path_to_acti: str, path_to_tag: str, path_to_xml: str, collapse_stages: bool | Callable = True, trim_to_eeg: bool = False, verbose: bool = True) -> ActiPSG: """Load corresponding PSG tags and actigraphic data, and create synchronized ActiPSG container. It is highly recommended to use raw actigraphic data here, and epoch whole object using chosen Epocher.to_epoch() method, because syncrhonization of PSG and actigraphy can always be off by 1 sample. synchronize_recordings.py first, and then run this function on exported and trimmed actigraphic data if you want to epoch separately. Args: path_to_acti (str): Path to file with actigraphic data. path_to_tag (str): Path to .tag with PSG tags formatted in obci format. path_to_xml (str): Path to .xml with metadata about PSG formatted in obci format. collapse_stages (bool | Callable, optional): Whether to collapse hypnogram into sleep/wake classification using built-in heuristic. To use different heuristic pass the function(str) -> int as an argument. Defaults to True. trim_to_eeg (bool, optional): When True, actigraphy will be trimmed to match start and end of PSG. When False, PSG will be extended using wake epochs to match actigraphy start and end. Defaults to False. verbose (bool, optional): Whether to print out loading progress. Defaults to True. Returns: ActiPSG: Object containing synchronized actigraphic and PSG data. """ with redirect_stdout(FileManager.__get_stdout(verbose)): acti = FileManager.load_file(path_to_acti) xml_dom = parse(path_to_xml) psg_start_ts = float(xml_dom.getElementsByTagName("rs:firstSampleTimestamp")[0].childNodes[0].data) if isinstance(acti, _Raw) or acti.epoching_method_metadata is None: acti = acti.cut_by_timestamp(psg_start_ts, None) # pass else: print("If reading already epoched files make sure, that they were synchronized with PSG scorings prior to epoching. Synchronizing after epoching might cause an offset.") tags = TagsFileReader(path_to_tag).get_tags() return ActiPSG.from_continous_stages(acti, tags, psg_start_ts + tags[0]['start_timestamp'], collapse_stages, trim_to_eeg)
[docs] @staticmethod def save_csv(path: str, acti_data: _ActiData, verbose: bool = True): """Save actigraphic data to a generic .csv, readable by FileManager and human-friendly formatted. Args: path (str): Path to the .csv output file. acti_data (_ActiData): Object to be exported as .csv. verbose (bool, optional): Whether to print out loading progress. Defaults to True. """ if not os.path.basename(path).endswith('csv'): path = '.'.join(path.split('.')[:-1]) + 'csv' with redirect_stdout(FileManager.__get_stdout(verbose)): with open(path, 'w', newline='') as f: print(f'Saving as a generic .csv: {path}') writer = csv.writer(f) writer.writerow(['---GENERIC CSV GENERATED BY ADA---']) writer.writerow(['Sampling Frequency', acti_data.fs]) writer.writerow('') writer.writerow(['---METADATA---']) for key in acti_data.metadata.keys(): writer.writerow([key, acti_data.metadata[key]]) writer.writerow('') writer.writerow(['---EPOCHING METADATA---']) if not isinstance(acti_data, _Raw) and acti_data.epoching_method_metadata is not None: for key in acti_data.epoching_method_metadata.keys(): writer.writerow([key, acti_data.epoching_method_metadata[key]]) writer.writerow('') writer.writerow(['---SCORING METADATA---']) if isinstance(acti_data, ScoredShort): for key in acti_data.scoring_method_metadata.keys(): writer.writerow([key, acti_data.scoring_method_metadata[key]]) writer.writerow('') writer.writerow(['---DATA---']) writer.writerow(acti_data.channel_names) writer.writerows(acti_data.data.T) print(f'Done saving as a generic .csv: {path}')
@staticmethod def _read_generic_csv(path: str) -> GenericData | ScoredShort: def type_heuristic(dictionary): for key in dictionary.keys(): value = dictionary[key] if value == 'True' or value == 'False': dictionary[key] = bool(value) elif '.' in value: try: dictionary[key] = float(value) except ValueError: dictionary[key] = value else: try: dictionary[key] = int(value) except ValueError: dictionary[key] = value return dictionary with open(path, 'r', newline='') as f: reader = csv.reader(f) row = reader.__next__() assert row[0] == '---GENERIC CSV GENERATED BY ADA---', 'Wrong format!' fs = float(reader.__next__()[1]) reader.__next__() status = 0 metadata = {0: {}, 1: {}, 2: {}} for row in reader: if '---EPOCHING METADATA---' in row: status = 1 elif '---SCORING METADATA---' in row: status = 2 elif '---DATA---' in row: status = 3 break if len(row) > 1: metadata[status][row[0]] = row[1] channel_names = reader.__next__() data = [] for row in tqdm(reader, desc=f"Reading from file: {path}"): data.append(row) data = np.array(data, dtype=np.float64).T metadata[1] = type_heuristic(metadata[1]) if len(metadata[1]) > 0 else None metadata[2] = type_heuristic(metadata[2]) if len(metadata[2]) > 0 else None if metadata[2] is not None: return ScoredShort(data, metadata[0], fs, metadata[1], metadata[2]) return GenericData(data, metadata[0], fs, channel_names, metadata[1], metadata[2])
[docs] @staticmethod def preview_metadata(path: str): """Preview metadata of a file without loading whole content. Args: path (str): Path to a file. Raises: ValueError: If a file is MESA .csv or generic .csv (not supported) Returns: tuple[dict, list]: Dictionary with metadata and list with channel names. """ if path.split('.')[-1].strip() == 'ada': data, ch = GenericData.preview_metadata(path) elif path.split('.')[-1].strip() == 'gt3x': data, ch = RawActiGraph.preview_metadata(path) elif path.split('.')[-1].strip() == 'bin': # TODO now this does not work anymore for files without .bin (ear eeg)... data, ch = RawGeneActiv.preview_metadata(path) elif path.split('.')[-1].strip() == 'csv': with open(path, 'r') as f: first_line = f.readline() # TODO Gotta change it one day so the file will not be opened multiple times. But this means need to rewrite all load_file funcitons... if '---GENERIC CSV GENERATED BY ADA---' in first_line: raise ValueError("No preview for generic CSV") elif 'GENEActiv' in first_line: try: data, ch = GeneActivMVM.preview_metadata(path) except ValueError: data, ch = RawGeneActiv.preview_metadata(path) elif 'mesaid' in first_line: raise ValueError("No preview for MESA CSV") elif "Firmware" in first_line: data, ch = RawActiGraph.preview_metadata(path) return data, ch
[docs] def main(): """A script for batch converting actigraphic files into the .ada format. Use with -h or --help for details.""" import argparse from pathlib import Path parser = argparse.ArgumentParser(description="Convert given files to generic format.", add_help=False) parser.add_argument('--files', '-f', nargs='+', type=str, required=True, help="Path to raw files.") parser.add_argument('--outdir', '-o', type=str, required=True, help="Directory in which converted files will be saved.") namespace = parser.parse_args() if len(namespace.files) == 1: files_to_work = [str(e) for e in Path(os.path.dirname(namespace.files[0])).glob(os.path.basename(namespace.files[0]))] else: files_to_work = namespace.files os.makedirs(namespace.outdir, exist_ok=True) for file in files_to_work: filename = os.path.basename(file).split('.')[0] outfile = os.path.join(namespace.outdir, filename + '.ada') if os.path.exists(outfile): print('File already exists, skipping', outfile) continue acti = FileManager.load_file(file) FileManager.export_generic(outfile, acti) del acti
if __name__ == '__main__': main()