#!/usr/bin/env python
# -*- coding: utf-8 -*-
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
# pylint: disable=no-member
#
# @Author: oesteban
# @Date: 2016-01-05 11:33:39
# @Email: code@oscaresteban.es
# @Last modified by: oesteban
# @Last Modified time: 2016-08-26 10:26:15
""" Encapsulates report generation functions """
import sys
import os
import os.path as op
import collections
import glob
import json
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import jinja2
from mriqc.interfaces.viz_utils import plot_measures, plot_all
# matplotlib.rc('figure', figsize=(11.69, 8.27)) # for DINA4 size
STRUCTURAL_QCGROUPS = [
['icvs_csf', 'icvs_gm', 'icvs_wm'],
['rpve_csf', 'rpve_gm', 'rpve_wm'],
['inu_range', 'inu_med'],
['cnr'], ['efc'], ['fber'], ['cjv'],
['fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z'],
['qi1', 'qi2', 'wm2max'],
['snr', 'snr_csf', 'snr_gm', 'snr_wm'],
['summary_mean_bg', 'summary_stdv_bg', 'summary_p05_bg', 'summary_p95_bg',
'summary_mean_csf', 'summary_stdv_csf', 'summary_p05_csf', 'summary_p95_csf',
'summary_mean_gm', 'summary_stdv_gm', 'summary_p05_gm', 'summary_p95_gm',
'summary_mean_wm', 'summary_stdv_wm', 'summary_p05_wm', 'summary_p95_wm']
]
FUNC_SPATIAL_QCGROUPS = [
['summary_mean_bg', 'summary_stdv_bg', 'summary_p05_bg', 'summary_p95_bg'],
['summary_mean_fg', 'summary_stdv_fg', 'summary_p05_fg', 'summary_p95_fg'],
['efc'],
['fber'],
['fwhm', 'fwhm_x', 'fwhm_y', 'fwhm_z'],
['gsr_%s' % a for a in ['x', 'y']],
['snr']
]
FUNC_TEMPORAL_QCGROUPS = [
['dvars'], ['gcor'], ['m_tsnr'], ['mean_fd'],
['num_fd'], ['outlier'], ['perc_fd'], ['quality']
]
[docs]def workflow_report(qctype, settings=None):
""" Creates the report """
import datetime
dframe, failed = generate_csv(qctype, settings)
sub_list = sorted(pd.unique(dframe.subject_id.ravel())) #pylint: disable=E1101
if qctype == 'anat':
qctype = 'anatomical'
elif qctype == 'func':
qctype = 'functional'
out_dir = settings.get('output_dir', os.getcwd())
work_dir = settings.get('work_dir', op.abspath('tmp'))
out_file = op.join(out_dir, qctype + '_%s.pdf')
result = {}
func = getattr(sys.modules[__name__], 'report_' + qctype)
pdf_group = []
# Generate summary page
out_sum = op.join(work_dir, 'summary_group.pdf')
summary_cover(dframe, qctype, failed=failed, out_file=out_sum)
pdf_group.append(out_sum)
# Generate group report
qc_group = op.join(work_dir, 'qc_measures_group.pdf')
# Generate violinplots. If successfull, add documentation.
func(dframe, out_file=qc_group)
pdf_group.append(qc_group)
if len(pdf_group) > 0:
out_group_file = op.join(out_dir, '%s_group.pdf' % qctype)
# Generate final report with collected pdfs in plots
concat_pdf(pdf_group, out_group_file)
result['group'] = {'success': True, 'path': out_group_file}
out_indiv_files = []
# Generate individual reports for subjects
for subid in sub_list:
# Get subject-specific info
subdf = dframe.loc[dframe['subject_id'] == subid]
sessions = sorted(pd.unique(subdf.session_id.ravel()))
plots = []
sess_scans = []
# Re-build mosaic location
for sesid in sessions:
sesdf = subdf.loc[subdf['session_id'] == sesid]
scans = sorted(pd.unique(sesdf.run_id.ravel()))
# Each scan has a volume and (optional) fd plot
for scanid in scans:
if 'anat' in qctype:
fpdf = op.join(work_dir, 'anatomical_%s_%s_%s.pdf' %
(subid, sesid, scanid))
if op.isfile(fpdf):
plots.append(fpdf)
if 'func' in qctype:
mepi = op.join(work_dir, 'meanepi_%s_%s_%s.pdf' %
(subid, sesid, scanid))
if op.isfile(mepi):
plots.append(mepi)
tsnr = op.join(work_dir, 'tsnr_%s_%s_%s.pdf' %
(subid, sesid, scanid))
if op.isfile(tsnr):
plots.append(tsnr)
framedisp = op.join(work_dir, 'fd_%s_%s_%s.pdf' %
(subid, sesid, scanid))
if op.isfile(framedisp):
plots.append(framedisp)
sess_scans.append('%s (%s)' % (sesid, ', '.join(scans)))
# Summary cover
sfailed = []
if failed:
sfailed = ['%s (%s)' % (s[1], s[2])
for s in failed if subid == s[0]]
out_sum = op.join(work_dir, '%s_summary_%s.pdf' % (qctype, subid))
summary_cover(dframe, qctype, failed=sfailed, sub_id=subid, out_file=out_sum)
plots.insert(0, out_sum)
# Summary (violinplots) of QC measures
qc_ms = op.join(work_dir, '%s_measures_%s.pdf' % (qctype, subid))
func(dframe, subject=subid, out_file=qc_ms)
plots.append(qc_ms)
if len(plots) > 0:
# Generate final report with collected pdfs in plots
sub_path = out_file % subid
concat_pdf(plots, sub_path)
out_indiv_files.append(sub_path)
result[subid] = {'success': True, 'path': sub_path}
return out_group_file, out_indiv_files, result
[docs]def summary_cover(dframe, qctype, failed=None, sub_id=None, out_file=None):
""" Generates a cover page with subject information """
from mriqc import __version__
import datetime
import numpy as np
from rst2pdf.createpdf import RstToPdf
import pkg_resources as pkgr
if failed is None:
failed = []
newdf = dframe.copy()
# Format the size
#pylint: disable=E1101
newdf[['size_x', 'size_y', 'size_z']] = newdf[['size_x', 'size_y', 'size_z']].astype(np.uint16)
formatter = lambda row: ur'%d \u00D7 %d \u00D7 %d' % (
row['size_x'], row['size_y'], row['size_z'])
newdf['size'] = newdf[['size_x', 'size_y', 'size_z']].apply(formatter, axis=1)
# Format spacing
newdf[['spacing_x', 'spacing_y', 'spacing_z']] = newdf[[
'spacing_x', 'spacing_y', 'spacing_z']].astype(np.float32) #pylint: disable=E1101
formatter = lambda row: ur'%.3f \u00D7 %.3f \u00D7 %.3f' % (
row['spacing_x'], row['spacing_y'], row['spacing_z'])
newdf['spacing'] = newdf[['spacing_x', 'spacing_y', 'spacing_z']].apply(formatter, axis=1)
# columns
cols = ['session_id', 'run_id', 'size', 'spacing']
colnames = ['Session', 'Run', 'Size', 'Spacing']
if 'tr' in newdf.columns.ravel():
cols.append('tr')
colnames.append('TR (sec)')
if 'size_t' in newdf.columns.ravel():
cols.append('size_t')
colnames.append(r'\# Timepoints')
# Format parameters table
if sub_id is None:
cols.insert(0, 'subject_id')
colnames.insert(0, 'Subject')
else:
newdf = newdf[newdf.subject_id.astype(unicode) == sub_id]
newdf = newdf[cols]
colsizes = []
for col, colname in zip(cols, colnames):
newdf[[col]] =newdf[[col]].astype(unicode)
colsize = newdf.loc[:, col].map(len).max()
colsizes.append(colsize if colsize > len(colname) else len(colname))
colformat = u' '.join(u'{:<%d}' % c for c in colsizes)
formatter = lambda row: colformat.format(*row)
rowsformatted = newdf[cols].apply(formatter, axis=1).ravel().tolist()
# rowsformatted = [formatter.format(*row) for row in newdf.iterrows()]
header = colformat.format(*colnames)
sep = colformat.format(*['=' * c for c in colsizes])
ptable = '\n'.join([sep, header, sep] + rowsformatted + [sep])
title = 'MRIQC: %s MRI %s report' % (
qctype, 'group' if sub_id is None else 'individual')
# Substitution dictionary
context = {
'title': title + '\n' + ''.join(['='] * len(title)),
'timestamp': datetime.datetime.now().strftime("%Y-%m-%d, %H:%M"),
'version': __version__,
'failed': failed,
'imparams': ptable
}
if sub_id is not None:
context['sub_id'] = sub_id
if sub_id is None:
template = ConfigGen(pkgr.resource_filename(
'mriqc', op.join('data', 'reports', 'cover_group.rst')))
else:
template = ConfigGen(pkgr.resource_filename(
'mriqc', op.join('data', 'reports', 'cover_individual.rst')))
RstToPdf().createPdf(
text=template.compile(context), output=out_file)
[docs]def concat_pdf(in_files, out_file='concatenated.pdf'):
""" Concatenate PDF list (http://stackoverflow.com/a/3444735) """
from PyPDF2 import PdfFileWriter, PdfFileReader
with open(out_file, 'wb') as out_pdffile:
outpdf = PdfFileWriter()
for in_file in in_files:
with open(in_file, 'rb') as in_pdffile:
inpdf = PdfFileReader(in_pdffile)
for fpdf in range(inpdf.numPages):
outpdf.addPage(inpdf.getPage(fpdf))
outpdf.write(out_pdffile)
return out_file
def _write_report(dframe, groups, sub_id=None, sc_split=False, condensed=True,
out_file='report.pdf'):
""" Generates the violin plots of each qctype """
columns = dframe.columns.ravel()
headers = []
for group in groups:
rem = []
for head in group:
if head not in columns:
rem.append(head)
else:
headers.append(head)
for i in rem:
group.remove(i)
report = PdfPages(out_file)
sessions = sorted(pd.unique(dframe.session_id.ravel()))
for ssid in sessions:
sesdf = dframe.copy().loc[dframe['session_id'] == ssid]
scans = pd.unique(sesdf.run_id.ravel())
if sc_split:
for scid in scans:
subset = sesdf.loc[sesdf['run_id'] == scid]
if len(subset.index) > 1:
if sub_id is None:
subtitle = '(%s_%s)' % (ssid, scid)
else:
subtitle = '(subject %s_%s_%s)' % (sub_id, ssid, scid)
if condensed:
fig = plot_all(sesdf, groups, subject=sub_id,
title='QC measures ' + subtitle)
else:
fig = plot_measures(
sesdf, headers, subject=sub_id,
title='QC measures ' + subtitle)
report.savefig(fig, dpi=300)
fig.clf()
else:
if len(sesdf.index) > 1:
if sub_id is None:
subtitle = '(%s)' % (ssid)
else:
subtitle = '(subject %s_%s)' % (sub_id, ssid)
if condensed:
fig = plot_all(sesdf, groups, subject=sub_id,
title='QC measures ' + subtitle)
else:
fig = plot_measures(
sesdf, headers, subject=sub_id,
title='QC measures ' + subtitle)
report.savefig(fig, dpi=300)
fig.clf()
report.close()
plt.close()
# print 'Written report file %s' % out_file
return out_file
[docs]def report_anatomical(
dframe, subject=None, sc_split=False, condensed=True,
out_file='anatomical.pdf'):
""" Calls the report generator on the functional measures """
return _write_report(dframe, STRUCTURAL_QCGROUPS, sub_id=subject, sc_split=sc_split,
condensed=condensed, out_file=out_file)
[docs]def report_functional(
dframe, subject=None, sc_split=False, condensed=True,
out_file='functional.pdf'):
""" Calls the report generator on the functional measures """
from tempfile import mkdtemp
wdir = mkdtemp()
fspatial = _write_report(
dframe, FUNC_TEMPORAL_QCGROUPS, sub_id=subject, sc_split=sc_split,
condensed=condensed, out_file=op.join(wdir, 'fspatial.pdf'))
ftemporal = _write_report(
dframe, FUNC_SPATIAL_QCGROUPS, sub_id=subject, sc_split=sc_split,
condensed=condensed, out_file=op.join(wdir, 'ftemporal.pdf'))
concat_pdf([fspatial, ftemporal], out_file)
return out_file
[docs]def generate_csv(data_type, settings):
datalist = []
errorlist = []
jsonfiles = glob.glob(op.join(settings['output_dir'], 'derivatives', '%s*.json' % data_type))
if not jsonfiles:
raise RuntimeError('No individual QC files were found in the working directory'
'\'%s\' for the \'%s\' data type.' % (settings['output_dir'], data_type))
for jsonfile in jsonfiles:
dfentry = _read_and_save(jsonfile)
if dfentry is not None:
if 'exec_error' not in dfentry.keys():
datalist.append(dfentry)
else:
errorlist.append(dfentry['subject_id'])
dataframe = pd.DataFrame(datalist)
cols = dataframe.columns.tolist() # pylint: disable=no-member
reorder = []
for field in ['run', 'session', 'subject']:
for col in cols:
if col.startswith(field):
reorder.append(col)
for col in reorder:
cols.remove(col)
cols.insert(0, col)
if 'mosaic_file' in cols:
cols.remove('mosaic_file')
# Sort the dataframe, with failsafe if pandas version is too old
try:
dataframe = dataframe.sort_values(by=['subject_id', 'session_id', 'run_id'])
except AttributeError:
#pylint: disable=E1101
dataframe = dataframe.sort(columns=['subject_id', 'session_id', 'run_id'])
# Drop duplicates
try:
#pylint: disable=E1101
dataframe.drop_duplicates(['subject_id', 'session_id', 'run_id'], keep='last',
inplace=True)
except TypeError:
#pylint: disable=E1101
dataframe.drop_duplicates(['subject_id', 'session_id', 'run_id'], take_last=True,
inplace=True)
out_fname = op.join(settings['output_dir'], data_type + 'MRIQC.csv')
dataframe[cols].to_csv(out_fname, index=False)
return dataframe, errorlist
def _read_and_save(in_file):
with open(in_file, 'r') as jsondata:
values = _flatten(json.load(jsondata))
return values
return None
def _flatten(in_dict, parent_key='', sep='_'):
items = []
for k, val in list(in_dict.items()):
new_key = parent_key + sep + k if parent_key else k
if isinstance(val, collections.MutableMapping):
items.extend(_flatten(val, new_key, sep=sep).items())
else:
items.append((new_key, val))
return dict(items)
[docs]class ConfigGen(object):
"""
Utility class for generating a config file from a jinja template.
https://github.com/oesteban/endofday/blob/f2e79c625d648ef45b08cc1f11fd0bd84342d604/endofday/core/template.py
"""
def __init__(self, template_str):
self.template_str = template_str
self.env = jinja2.Environment(
loader=jinja2.FileSystemLoader(searchpath='/'),
trim_blocks=True, lstrip_blocks=True)
[docs] def compile(self, configs):
template = self.env.get_template(self.template_str)
return template.render(configs)
[docs] def generate_conf(self, configs, path):
output = self.compile(configs)
with open(path, 'w+') as output_file:
output_file.write(output)