Source code for idtxl.idtxl_import

"""Import external file formats into IDTxl.

Provide functions to import the following into IDTxl:

    - mat-files (version>7.3, hdf5)
    - FieldTrip-style mat-files (version>7.3, hdf5)

Matlab supports hdf5 only for files saved as version 7.3 or higher:
https://au.mathworks.com/help/matlab/ref/save.html#inputarg_version

Creates a numpy array usable as input to IDTxl.

Methods:
    ft_trial_2_numpyarray(file_name, ft_struct_name)
    matarray2idtxlconverter(file_name, array_name, order) =     takes a file_name,
                    the name of the array variable (array_name) inside,
                    and the order of sensor axis,  time axisand (CHECK THIS!!)
                    repetition axis (as a list)

Note:
    Written for Python 3.4+

Created on Wed Mar 19 12:34:36 2014

@author: Michael Wibral
"""
import h5py
import numpy as np
from scipy.io import loadmat
from idtxl.data import Data

VERBOSE = False


[docs]def import_fieldtrip(file_name, ft_struct_name, file_version, normalise=True): """Convert FieldTrip-style MATLAB-file into an IDTxl Data object. Import a MATLAB structure with fields "trial" (data), "label" (channel labels), "time" (time stamps for data samples), and "fsample" (sampling rate). This structure is the standard file format in the MATLAB toolbox FieldTrip and commonly use to represent neurophysiological data (see also http://www.fieldtriptoolbox.org/reference/ft_datatype_raw). The data is returned as a IDTxl Data() object. The structure is assumed to be saved as a matlab hdf5 file ("-v7.3' or higher, .mat) with a SINGLE FieldTrip data structure inside. Args: file_name : string full (matlab) file_name on disk ft_struct_name : string variable name of the MATLAB structure that is in FieldTrip format (autodetect will hopefully be possible later ...) file_version : string version of the file, e.g. 'v7.3' for MATLAB's 7.3 format normalise : bool [optional] normalise data after import (default=True) Returns: Data() instance instance of IDTxl Data object, containing data from the 'trial' field list of strings list of channel labels, corresponding to the 'label' field numpy array time stamps for samples, corresponding to one entry in the 'time' field int sampling rate, corresponding to the 'fsample' field @author: Michael Wibral """ if file_version != "v7.3": raise RuntimeError('At present only m-files in format 7.3 are ' 'supported, please consider reopening and resaving ' 'your m-file in that version.') # TODO we could write a fallback option using numpy's loadmat? print('Creating Python dictionary from FT data structure: {0}' .format(ft_struct_name)) trial_data = _ft_import_trial(file_name, ft_struct_name) label = _ft_import_label(file_name, ft_struct_name) fsample = _ft_fsample_2_float(file_name, ft_struct_name) timestamps = _ft_import_time(file_name, ft_struct_name) dat = Data(data=trial_data, dim_order='spr', normalise=normalise) return dat, label, timestamps, fsample
def _ft_import_trial(file_name, ft_struct_name): """Import FieldTrip trial data into Python.""" ft_file = h5py.File(file_name) ft_struct = ft_file[ft_struct_name] # TODO: ft_struct_name = automagic... # Get the trial cells that contain the references (pointers) to the data # we need. Then get the data from matrices in cells of a 1 x numtrials cell # array in the original FieldTrip structure. trial = ft_struct['trial'] # Get the trial cells that contain the references (pointers) to the data # we need. Then get the data from matrices in cells of a 1 x numtrials cell # array in the original FieldTrip structure. trial = ft_struct['trial'] # Allocate memory to hold actual data, read shape of first trial to know # the data size. trial_data_tmp = np.array(ft_file[trial[0][0]]) # get data from 1st trial print('Found data with first dimension: {0}, and second: {1}' .format(trial_data_tmp.shape[0], trial_data_tmp.shape[1])) geometry = trial_data_tmp.shape + (trial.shape[0],) trial_data = np.empty(geometry) # Get actual data from h5py structure. for tt in range(0, trial.shape[0]): trialref = trial[tt][0] # get trial reference trial_data[:, :, tt] = np.array(ft_file[trialref]) # get data ft_file.close() return trial_data def _ft_import_label(file_name, ft_struct_name): """Import FieldTrip labels into Python.""" # for details of the data handling see comments in _ft_import_trial ft_file = h5py.File(file_name) ft_struct = ft_file[ft_struct_name] ft_label = ft_struct['label'] if VERBOSE: print('Converting FT labels to python list of strings') label = [] for ll in range(0, ft_label.shape[0]): # There is only one item in labelref, but we have to index it. # Matlab has character arrays that are read as bytes in Python 3. # Here, map maps the stuff in labeltmp to characters and "". # makes it into a real Python string. labelref = ft_label[ll] labeltmp = ft_file[labelref[0]] strlabeltmp = "".join(map(chr, labeltmp[0:])) label.append(strlabeltmp) ft_file.close() return label def _ft_import_time(file_name, ft_struct_name): """Import FieldTrip time stamps into Python.""" # for details of the data handling see comments in ft_trial_2_numpyarray ft_file = h5py.File(file_name) ft_struct = ft_file[ft_struct_name] ft_time = ft_struct['time'] if VERBOSE: print('Converting FT time cell array to numpy array') np_timeaxis_tmp = np.array(ft_file[ft_time[0][0]]) geometry = np_timeaxis_tmp.shape + (ft_time.shape[0],) timestamps = np.empty(geometry) for tt in range(0, ft_time.shape[0]): timeref = ft_time[tt][0] timestamps[:, :, tt] = np.array(ft_file[timeref]) ft_file.close() return timestamps def _ft_fsample_2_float(file_name, ft_struct_name): ft_file = h5py.File(file_name) ft_struct = ft_file[ft_struct_name] FTfsample = ft_struct['fsample'] fsample = int(FTfsample[0]) if VERBOSE: print('Converting FT fsample array (1x1) to numpy array (1x1)') return fsample
[docs]def import_matarray(file_name, array_name, file_version, dim_order, normalise=True): """Read Matlab hdf5 file into IDTxl. reads a matlab hdf5 file ("-v7.3' or higher, .mat) with a SINGLE array inside and returns a numpy array with dimensions that are channel x time x trials, using np.swapaxes where necessary Note: The import function squeezes the loaded mat-file, i.e., any singleton dimension will be removed. Hence do not enter singleton dimension into the 'dim_order', e.g., don't pass dim_order='ps' but dim_order='s' if you want to load a 1D-array where entries represent samples recorded from a single channel. Args: file_name : string full (matlab) file_name on disk array_name : string variable name of the MATLAB structure to be read file_version : string version of the file, e.g. 'v7.3' for MATLAB's 7.3 format, currently versions 'v4', 'v6', 'v7', and 'v7' are supported dim_order : string order of dimensions, accepts any combination of the characters 'p', 's', and 'r' for processes, samples, and replications; must have the same length as the data dimensionality, e.g., 'ps' for a two-dimensional array of data from several processes over time normalise : bool [optional] normalise data after import (default=True) Returns: Data() instance instance of IDTxl Data object, containing data from the 'trial' field list of strings list of channel labels, corresponding to the 'label' field numpy array time stamps for samples, corresponding to one entry in the 'time' field int sampling rate, corresponding to the 'fsample' field Created on Wed Mar 19 12:34:36 2014 @author: Michael Wibral """ if file_version == 'v7.3': mat_file = h5py.File(file_name) # Assert that at least one of the keys found at the top level of the # HDF file matches the name of the array we wanted if array_name not in mat_file.keys(): raise RuntimeError('Array {0} not in mat file or not a variable ' 'at the file''s top level.'.format(array_name)) # 2. Create an object for the matlab array (from the hdf5 hierachy), # the trailing [()] ensures everything is read mat_data = np.squeeze(np.asarray(mat_file[array_name][()])) elif file_version in ['v4', 'v6', 'v7']: try: m = loadmat(file_name, squeeze_me=True, variable_names=array_name) except NotImplementedError as err: raise RuntimeError('You may have provided an incorrect file ' 'version. The mat file was probably saved as ' 'version 7.3 (hdf5).') mat_data = m[array_name] # loadmat returns a dict containing variables else: raise ValueError('Unkown file version: {0}.'.format(file_version)) # Create output: IDTxl data object, list of labels, sampling info in unit # time steps (sampling rate of 1). print('Creating Data object from matlab array: {0}.'.format(array_name)) dat = Data(mat_data, dim_order=dim_order, normalise=normalise) label = [] for n in range(dat.n_processes): label.append('channel_{0:03d}'.format(n)) fsample = 1 timestamps = np.arange(dat.n_samples) return dat, label, timestamps, fsample