Source code for ada.data_containers.generic

import numpy as np
from typing import Type, Tuple
import pprint
# import warnings
import zipfile

from ada.data_containers._base import _ActiData, _Raw, _Epoched
from ada.io.geneactiv import RawGeneActiv
from ada.io.actigraph import RawActiGraph


[docs] class GenericData(_ActiData): """A class for storing and handling actigraphic data, both raw and epoched. Generic format compatible with various manufacturers and algorithms.""" __slots__ = ('_original_format_metadata', '_epoching_method_metadata', '_scoring_method_metadata') def __init__(self, data: np.ndarray, metadata: dict, fs: float, channel_names: list, epoching_method_metadata: dict | None = None, scoring_method_metadata: dict | None = None): self._original_format_metadata = {'data': 0, 'metadata': 1, 'original_format_metadata': 2, 'fs': fs, 'channel_names': channel_names} self._epoching_method_metadata = epoching_method_metadata self._scoring_method_metadata = scoring_method_metadata super().__init__(data, metadata, fs, channel_names) def __repr__(self): str1 = pprint.pformat(self._metadata, indent=4) if self._epoching_method_metadata is None: str3 = "This data was never epoched." else: str3 = pprint.pformat(self._epoching_method_metadata, indent=4) if self._scoring_method_metadata is None: str4 = "This data is not scored." else: str4 = pprint.pformat(self._scoring_method_metadata, indent=4) return "Raw metadata:\n{}\n\nEpoching metadata:\n{}\n\nScoring metadata:\n{}".format(str1, str3, str4) @property def timestamp(self) -> np.ndarray: idx = self._channel_names.index("timestamp") return self._data[idx] @property def to_score(self) -> np.ndarray: """Data to be scored by scoring algorithms.""" if self.epoching_method_metadata is None and self.scoring_method_metadata is None: # this assumes we have only 3-axial accelerometers x = self.data[self._channel_names.index('x')] y = self.data[self._channel_names.index('y')] z = self.data[self._channel_names.index('z')] return np.abs(np.sqrt(x ** 2 + y ** 2 + z ** 2) - 1) elif self.epoching_method_metadata is not None and self.scoring_method_metadata is None: return self.data[self.epoching_method_metadata["to_score channel"]] else: raise ValueError("Data is already scored!") @property def epoching_method_metadata(self) -> dict | None: """Metadata asssociated with the epoching method and its parameters. Returns None if data is not epoched.""" if self._epoching_method_metadata is None: # warnings.warn("This object was never epoched. No epoching metadata.") return None else: return self._epoching_method_metadata @property def scoring_method_metadata(self) -> dict | None: """Metadata asssociated with the scoring algorithm and its parameters. Returns None if data is not scored.""" if self._scoring_method_metadata is None: # warnings.warn("This object is not a scored object. No scoring metadata.") return None else: return self._scoring_method_metadata @property def x(self) -> np.ndarray: """Data from x axis, raises Error if data is not 3-axial.""" try: return self._data[self._channel_names.index('x')] except ValueError: raise ValueError("This data is not 3-axial, no x data!") @property def y(self) -> np.ndarray: """Data from y axis, raises Error if data is not 3-axial.""" try: return self._data[self._channel_names.index('y')] except ValueError: raise ValueError("This data is not 3-axial, no y data!") @property def z(self) -> np.ndarray: """Data from z axis, raises Error if data is not 3-axial.""" try: return self._data[self._channel_names.index('z')] except ValueError: raise ValueError("This data is not 3-axial, no z data!") @property def stationary_variance(self) -> float: """Variance of actigraph laying still (transducer noise).""" if self.epoching_method_metadata is None and self.scoring_method_metadata is None: return self._get_device(self)._stationary_variance raise ValueError("Stationary variance is defined only for raw data.") @property def dynamic_range(self) -> float: """Dynamic range (in g) of the digital converter.""" if self.epoching_method_metadata is None and self.scoring_method_metadata is None: return self._get_device(self)._dynamic_range raise ValueError("Stationary variance is defined only for raw data.") @property def first_sample_timestamp(self) -> float: """Unix timestamp of first sample.""" device = GenericData._get_device(self) return device._convert_timestamp(self.timestamp[0], self._metadata, True) @property def id(self) -> str: return GenericData._get_id(self)
[docs] @staticmethod def load_file(path: str) -> "GenericData": """Loading file saved in the generic format provided by this package. Args: path (str): Path to the .ada file. Returns: GenericData: Object containing data. """ return GenericData(*GenericData._generic_load(path))
[docs] def export(self, path: str): """Exports object data to the generic format. All metadata are preserved in the file. Args: path (str): Path to the .ada file. """ to_export = [self._data, self._metadata, self._original_format_metadata] if self._epoching_method_metadata is not None: to_export.append(self._epoching_method_metadata) self._original_format_metadata['epoching_method_metadata'] = len(to_export) - 1 if self._scoring_method_metadata is not None: to_export.append(self._scoring_method_metadata) self._original_format_metadata['scoring_method_metadata'] = len(to_export) - 1 GenericData._generic_export(path, to_export)
@staticmethod def _generic_load(path: str) -> Tuple[np.ndarray, dict, float, list, dict | None, dict | None]: def arr_no(n): return "arr_{}".format(n) print('Loading data from file: {}'.format(path)) files = np.load(path, allow_pickle=True) data = files[arr_no(0)] metadata = files[arr_no(1)].item() original_format_metadata = files[arr_no(2)].item() if "epoching_method_metadata" in original_format_metadata.keys(): epoching_method_metadata = files[arr_no(original_format_metadata["epoching_method_metadata"])].item() else: epoching_method_metadata = None if "scoring_method_metadata" in original_format_metadata.keys(): scoring_method_metadata = files[arr_no(original_format_metadata["scoring_method_metadata"])].item() else: scoring_method_metadata = None print('Done loading from {}'.format(path)) return data, metadata, float(original_format_metadata['fs']), original_format_metadata['channel_names'], epoching_method_metadata, scoring_method_metadata @staticmethod def _generic_export(path: str, to_export: list): print('Saving data to file: {}'.format(path)) if not path.endswith('.ada'): path = path + '.ada' with open(path, 'wb') as f: np.savez_compressed(f, *to_export) print('Done saving to {}'.format(path))
[docs] @staticmethod def from_nongeneric(acti_data: _ActiData) -> "GenericData": """Converts any other actigraphic data object usable to the generic format. Usefull for standarization. Args: acti_data (_ActiData): Actigraphic data object, either Raw or Epoched. Raises: ValueError: If the provided object is not actigraphic data. Returns: GenericData: GenericData object containing the data from original object. """ if isinstance(acti_data, _Raw): return GenericData(acti_data.data, acti_data.metadata, acti_data.fs, acti_data.channel_names, None, None) if isinstance(acti_data, _Epoched): return GenericData(acti_data.data, acti_data.metadata, acti_data.fs, acti_data.channel_names, acti_data.epoching_method_metadata, None) if isinstance(acti_data, GenericData): return acti_data raise ValueError("Object to be transformed must be actigraphic data. Use Raw or Epoched objects")
[docs] def cut_by_samples(self, start_sample: int, end_sample: int) -> "GenericData": """Create new object with the data cut by given indexes. Args: start_sample (int): First sample of output data. end_sample (int): Sample after the last sample of output data. Returns: GenericData: Object containing the cutted data. """ return GenericData(self._data[:, start_sample:end_sample], self._metadata, self._fs, self._channel_names, self._epoching_method_metadata, self._scoring_method_metadata)
[docs] def cut_by_timestamp(self, start_ts: float, end_ts: float | None) -> "GenericData": """Create new object with the data cut by given timestamps. Args: start_ts (float): Unix timestamp of output data beginning. end_ts (float | None): Unix timestamp of output data end. If None, last sample of output data will be last sample of input data. Returns: GenericData: Object containing the cutted data. """ data_cut = GenericData._generic_cut(self, start_ts, end_ts) return GenericData(data_cut, self._metadata, self._fs, self._channel_names, self._epoching_method_metadata, self._scoring_method_metadata)
@staticmethod def _get_device(acti_data: _ActiData) -> Type[_Raw]: if 'GENEActiv' in acti_data.metadata.values(): return RawGeneActiv elif np.any(['GT3X' in str(e) for e in acti_data.metadata.values()]): return RawActiGraph else: raise RuntimeError("Data generated by unknown device.") @staticmethod def _get_id(acti_data: _ActiData) -> str: if 'GENEActiv' in acti_data.metadata.values(): return acti_data.metadata['Subject Code'] elif 'mesa_id' in acti_data.metadata.keys(): return str(acti_data.metadata['mesa_id']) elif np.any(['GT3X' in e for e in acti_data.metadata.values()]): return acti_data.metadata['subject_name'] raise RuntimeError("Data generated by unknown device.") @staticmethod def _generic_cut(acti_data: _ActiData, start_ts: float, end_ts: float | None) -> np.ndarray: device = GenericData._get_device(acti_data) start_ts = device._convert_timestamp(start_ts, acti_data.metadata) if end_ts is None: end_ts = acti_data.timestamp[-1] else: end_ts = device._convert_timestamp(end_ts, acti_data.metadata) start_sample = np.where(acti_data.timestamp >= start_ts)[0][0] try: end_sample = np.where(acti_data.timestamp >= end_ts)[0][0] except IndexError: end_sample = len(acti_data.timestamp) return acti_data.data[:, start_sample:end_sample]
[docs] @staticmethod def preview_metadata(path: str) -> tuple[dict, list[str]]: """Preview of metadata without loading the file. Args: path (str): Path to the file Raises: ValueError: Wrong format Returns: tuple[dict, list[str]]: Metadata and channel names. """ try: archive = zipfile.ZipFile(path, 'r') metadata_file = archive.open("arr_2.npy") out_dict = {} original_format = np.load(metadata_file, allow_pickle=True).item() for name, arr in original_format.items(): if 'epoching' in name or 'scoring' in name or name == 'metadata': file = archive.open(f"arr_{arr}.npy") out_dict[name] = np.load(file, allow_pickle=True).item() archive.close() channel_names = original_format['channel_names'] return out_dict, channel_names except Exception: raise ValueError("Wrong format")