Source code for mriqc.utils.misc

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
""" Helper functions """

[docs]def bids_getfile(bids_dir, data_type, subject_id, session_id=None, run_id=None): """ A simple function to select files from a BIDS structure Example:: >>> from mriqc.data import get_ds003_downsampled >>> bids_getfile(get_ds003_downsampled(), 'anat', '05') #doctest: +ELLIPSIS u'...ds003_downsampled/sub-05/anat/sub-05_T1w.nii.gz' """ import os.path as op import glob if data_type == 'anat': scan_type = 'T1w' if data_type == 'func': scan_type = 'bold' out_file = op.join(bids_dir, subject_id) onesession = (session_id is None or session_id == 'single_session') onerun = (run_id is None or run_id == 'single_run') if onesession: if onerun: pattern = op.join(out_file, data_type, '%s_*%s.nii*' % (subject_id, scan_type)) else: pattern = op.join(out_file, data_type, '%s_*%s_%s.nii*' % (subject_id, run_id, scan_type)) else: if onerun: pattern = op.join(out_file, session_id, data_type, '%s_%s_*%s.nii*' % (subject_id, session_id, scan_type)) else: pattern = op.join(out_file, session_id, data_type, '%s_%s*_%s_%s.nii*' % (subject_id, session_id, run_id, scan_type)) results = glob.glob(pattern) if not results: raise RuntimeError( 'No file found with this pattern: "%s", finding ' 'BIDS dataset coordinates are (%s, %s, %s)' % (pattern, subject_id, session_id, run_id)) return results[0]
[docs]def bids_scan_file_walker(dataset=".", include_types=None, warn_no_files=False): """ Traverse a BIDS dataset and provide a generator interface to the imaging files contained within. :author: @chrisfilo https://github.com/preprocessed-connectomes-project/quality-assessment-prot\ ocol/blob/master/scripts/qap_bids_data_sublist_generator.py :param str dataset: path to the BIDS dataset folder. :param list(str) include_types: a list of the scan types (i.e. subfolder names) to include in the results. Can be any combination of "func", "anat", "fmap", "dwi". :param bool warn_no_files: issue a warning if no imaging files are found for a subject or a session. :return: a list containing, for each .nii or .nii.gz file found, the BIDS identifying tokens and their values. If a file doesn't have an identifying token its key will be None. """ import os import os.path as op from glob import glob from warnings import warn def _no_files_warning(folder): if not warn_no_files: return warn("No files of requested type(s) found in scan folder: %s" % folder, RuntimeWarning, stacklevel=1) def _walk_dir_for_prefix(target_dir, prefix): return [x for x in next(os.walk(target_dir))[1] if x.startswith(prefix)] def _tokenize_bids_scan_name(scanfile): scan_basename = op.splitext(op.split(scanfile)[1])[0] # .nii.gz will have .nii leftover scan_basename = scan_basename.replace(".nii", "") file_bits = scan_basename.split('_') # BIDS with non ses-* subfolders given default # "single_session" ses. file_tokens = {'scanfile': scanfile, 'sub': None, 'ses': 'single_session', 'acq': None, 'rec': None, 'run': None, 'task': None, 'modality': file_bits[-1]} for bit in file_bits: for key in file_tokens.keys(): if bit.startswith(key): file_tokens[key] = bit return file_tokens ######### if include_types is None: # include all scan types by default include_types = ['func', 'anat', 'fmap', 'dwi'] subjects = _walk_dir_for_prefix(dataset, 'sub-') if len(subjects) == 0: raise GeneratorExit("No BIDS subjects found to examine.") # for each subject folder, look for scans considering explicitly # defined sessions or the implicit "single_session" case. for subject in subjects: subj_dir = op.join(dataset, subject) sessions = _walk_dir_for_prefix(subj_dir, 'ses-') for scan_type in include_types: # seems easier to consider the case of multi-session vs. # single session separately? if len(sessions) > 0: subject_sessions = [op.join(subject, x) for x in sessions] else: subject_sessions = [subject] for session in subject_sessions: scan_files = glob(op.join( dataset, session, scan_type, '*.nii*')) if len(scan_files) == 0: _no_files_warning(session) for scan_file in scan_files: yield _tokenize_bids_scan_name(scan_file)
[docs]def gather_bids_data(dataset_folder, subject_inclusion=None, include_types=None): """ Extract data from BIDS root folder """ import os import os.path as op from six import string_types import yaml from glob import glob sub_dict = {} inclusion_list = [] if include_types is None: include_types = ['anat', 'func'] # create subject inclusion list if subject_inclusion is not None and isinstance(subject_inclusion, string_types): with open(subject_inclusion, "r") as f: inclusion_list = f.readlines() # remove any /n's inclusion_list = [s.strip() for s in inclusion_list] if subject_inclusion is not None and isinstance(subject_inclusion, list): inclusion_list = [] for s in subject_inclusion: if not s.startswith('sub-'): s = 'sub-' + s inclusion_list.append(s) sub_dict = {'anat': [], 'func': []} bids_inventory = bids_scan_file_walker(dataset_folder, include_types=include_types) for bidsfile in sorted(bids_inventory, key=lambda f: f['scanfile']): if subject_inclusion is not None: if bidsfile['sub'] not in inclusion_list: continue # implies that other anatomical modalities might be # analyzed down the road. if bidsfile['modality'] in ['T1w']: # ie, anatomical scan_key = 'single_run' if bidsfile['run'] is not None: # TODO: consider multiple acq/recs scan_key = bidsfile['run'] sub_dict['anat'].append( (bidsfile['sub'], bidsfile['ses'], scan_key)) elif bidsfile['modality'] in ['bold']: # ie, functional scan_key = bidsfile['task'] if bidsfile['acq'] is not None: scan_key += '_' + bidsfile['acq'] if bidsfile['run'] is not None: # TODO: consider multiple acq/recs scan_key += '_' + bidsfile['run'] if scan_key is None: scan_key = 'func_1' sub_dict['func'].append( (bidsfile['sub'], bidsfile['ses'], scan_key)) if len(include_types) == 1: return sub_dict[include_types[0]] return sub_dict
[docs]def reorder_csv(csv_file, out_file=None): """ Put subject, session and scan in front of csv file :param str csv_file: the input csv file :param str out_file: if provided, a new csv file is created :return: the path to the file with the columns reordered """ import pandas as pd if isinstance(csv_file, list): csv_file = csv_file[-1] if out_file is None: out_file = csv_file dataframe = pd.read_csv(csv_file) cols = dataframe.columns.tolist() # pylint: disable=no-member try: cols.remove('Unnamed: 0') except ValueError: # The column does not exist pass for col in ['scan', 'session', 'subject']: cols.remove(col) cols.insert(0, col) dataframe[cols].to_csv(out_file) return out_file
[docs]def rotate_files(fname): """A function to rotate file names""" import glob import os import os.path as op name, ext = op.splitext(fname) if ext == '.gz': name, ext2 = op.splitext(fname) ext = ext2 + ext if not op.isfile(fname): return prev = glob.glob('%s.*%s' % (name, ext)) prev.insert(0, fname) prev.append('%s.%d%s' % (name, len(prev) - 1, ext)) for i in reversed(range(1, len(prev))): os.rename(prev[i-1], prev[i])
[docs]def bids_path(subid, sesid=None, runid=None, prefix=None, out_path=None, ext='json'): import os.path as op fname = '%s' % subid if prefix is not None: if not prefix.endswith('_'): prefix += '_' fname = prefix + fname if sesid is not None: fname += '_ses-%s' % sesid if runid is not None: fname += '_run-%s' % runid if out_path is not None: fname = op.join(out_path, fname) return op.abspath(fname + '.' + ext)