antsxmm

 1try:
 2    from ._version import version as __version__
 3except ImportError:
 4    from importlib.metadata import version, PackageNotFoundError
 5    try:
 6        __version__ = version("antsxmm")
 7    except PackageNotFoundError:
 8        __version__ = "0.0.0-unknown"
 9
10from .bids import parse_antsxbids_layout
11from .core import process_session
12from .pipeline import run_study
13from .core import bind_mm_rows
14from .core import check_modality_order
15from .core import build_wide_table_from_mmwide
16
17__all__ = ['parse_antsxbids_layout', 'build_wide_table_from_mmwide', 'bind_mm_rows',  'process_session', 'run_study', '__version__']
def parse_antsxbids_layout(bids_root):
 6def parse_antsxbids_layout(bids_root):
 7    """
 8    Parses a BIDS directory generated by antsxbids.
 9    Returns a pandas DataFrame where each row is a unique Session.
10    """
11    bids_path = Path(bids_root)
12    if not bids_path.exists():
13        raise FileNotFoundError(f"Directory not found: {bids_root}")
14
15    # Use glob matching for robustness
16    subjects = sorted([d for d in bids_path.glob("sub*") if d.is_dir()])
17    
18    sessions_data = []
19
20    for sub_dir in subjects:
21        # CHANGED: Use the full folder name as the ID (e.g. 'sub-211239')
22        sub_id = sub_dir.name
23        
24        # Find sessions
25        sessions = sorted([d for d in sub_dir.glob("ses*") if d.is_dir()])
26        
27        for ses_dir in sessions:
28            # CHANGED: Use the full folder name as the ID (e.g. 'ses-20230405')
29            ses_id = ses_dir.name
30            
31            # Initialize dict for this session
32            data = {
33                'subjectID': sub_id,
34                'date': ses_id, 
35                'session_path': str(ses_dir)
36            }
37
38            # 1. Anatomy (T1w, FLAIR)
39            anat_dir = ses_dir / 'anat'
40            if anat_dir.exists():
41                t1s = sorted(list(anat_dir.glob("*T1w.nii.gz")))
42                flairs = sorted(list(anat_dir.glob("*FLAIR.nii.gz")))
43                
44                if t1s:
45                    data['t1_filename'] = str(t1s[0])
46                if flairs:
47                    data['flair_filename'] = str(flairs[0])
48
49            # 2. DWI
50            dwi_dir = ses_dir / 'dwi'
51            dwi_files = []
52            if dwi_dir.exists():
53                dwi_files = sorted([str(p) for p in dwi_dir.glob("*.nii.gz")])
54            data['dti_filenames'] = dwi_files
55
56            # 3. Functional
57            func_dir = ses_dir / 'func'
58            rsf_files = []
59            if func_dir.exists():
60                rsf_files = sorted([str(p) for p in func_dir.glob("*bold.nii.gz")])
61            data['rsf_filenames'] = rsf_files
62
63            # 4. Neuromelanin
64            nm_dir = ses_dir / 'melanin'
65            nm_files = []
66            if nm_dir.exists():
67                nm_files = sorted([str(p) for p in nm_dir.glob("*NM.nii.gz")])
68            data['nm_filenames'] = nm_files
69
70            # 5. Perfusion (ASL) - Placeholder logic, assuming 'perf' folder
71            perf_dir = ses_dir / 'perf'
72            if perf_dir.exists():
73                # Grab the first available ASL file
74                perfs = sorted(list(perf_dir.glob("*.nii.gz")))
75                if perfs:
76                    data['perf_filename'] = str(perfs[0])
77
78            # 6. PET (pet3d) - Placeholder logic, assuming 'pet' folder
79            pet_dir = ses_dir / 'pet'
80            if pet_dir.exists():
81                # Grab the first available PET file
82                pets = sorted(list(pet_dir.glob("*.nii.gz")))
83                if pets:
84                    data['pet3d_filename'] = str(pets[0])
85
86            # Only add if we have a T1
87            if 't1_filename' in data:
88                sessions_data.append(data)
89
90    return pd.DataFrame(sessions_data)

Parses a BIDS directory generated by antsxbids. Returns a pandas DataFrame where each row is a unique Session.

def build_wide_table_from_mmwide(root_dir, pattern='**/*_mmwide.csv', sep='_', verbose=True):
211def build_wide_table_from_mmwide(root_dir, pattern="**/*_mmwide.csv", sep="_", verbose=True):
212    root = Path(root_dir).expanduser().resolve()
213    csv_files = sorted(root.rglob(pattern))
214
215    if verbose:
216        print(f"\nFound {len(csv_files)} *_mmwide.csv files")
217        for f in csv_files:
218            print(" ->", f.relative_to(root))
219
220    MODALITY_MAP = {
221        "T1wHierarchical": "T1Hier",
222        "T1Hier":     "T1Hier",
223        "T1w":        "T1w",
224        "NM2DMT":     "NM2DMT",
225        "NM":         "NM2DMT",
226        "DTI":        "DTI",
227        "rsfMRI":     "rsfMRI",
228        "T2Flair":    "T2Flair",
229        "FLAIR":      "T2Flair",
230        "perf":       "perf",
231        "pet3d":      "pet3d",
232        "PET":        "pet3d",
233    }
234
235    MODALITY_ORDER = ["T1Hier", "T1w", "DTI", "rsfMRI", "T2Flair", "NM2DMT", "perf", "pet3d"]
236
237    raw_data = []
238
239    for csv_path in csv_files:
240        sub = next((p for p in csv_path.parts if p.startswith("sub-")), None)
241        ses = next((p for p in csv_path.parts if p.startswith("ses-")), None)
242        subject_key = f"{sub}_{ses}" if sub and ses else "UNKNOWN"
243
244        matched_prefix = None
245        best_len = 0
246        for clue, prefix in MODALITY_MAP.items():
247            if any(clue in part for part in csv_path.parts) or clue in csv_path.name:
248                if len(clue) > best_len:
249                    matched_prefix = prefix
250                    best_len = len(clue)
251
252        if not matched_prefix:
253            if verbose:
254                print(f" SKIP: unknown modality for file {csv_path.name}")
255            continue
256
257        if verbose:
258            print(f"\nProcessing: {matched_prefix.ljust(10)} | {csv_path.name}")
259
260        df = pd.read_csv(csv_path)
261
262        drop_cols = [c for c in df.columns if "hier_id" in c.lower()]
263        if drop_cols:
264            df = df.drop(columns=drop_cols)
265            if verbose:
266                print(f" Dropped columns: {drop_cols}")
267
268        if len(df) > 1:
269            if verbose:
270                print(f" Collapsing {len(df)} rows to last")
271            df = df.iloc[[-1]].copy()
272
273        df.insert(0, "bids_subject", subject_key)
274
275        raw_data.append((matched_prefix, df))
276
277    if not raw_data:
278        if verbose: print("No valid *_mmwide.csv files were loaded!")
279        return pd.DataFrame()
280
281    ordered_data = []
282    seen_mods = set()
283
284    for mod in MODALITY_ORDER:
285        for m, d in raw_data:
286            if m == mod:
287                ordered_data.append((m, d))
288                seen_mods.add(m)
289                break
290
291    for m, d in raw_data:
292        if m not in seen_mods:
293            ordered_data.append((m, d))
294
295    check_modality_order(ordered_data, MODALITY_ORDER)
296
297    t1hier_df = next((df for mod, df in ordered_data if mod == "T1Hier"), None)
298    t1hier_raw_cols = set()
299    if t1hier_df is not None:
300        t1hier_raw_cols = set(t1hier_df.columns) - {"bids_subject"}
301
302    processed_data = []
303    for mod, df in ordered_data:
304        if mod != "T1Hier" and t1hier_raw_cols:
305            overlap = t1hier_raw_cols & set(df.columns)
306            if overlap and verbose:
307                print(f" Excluding {len(overlap)} overlapping columns from {mod}")
308            df = df.drop(columns=overlap, errors='ignore')
309        processed_data.append((mod, df))
310
311    wide = bind_mm_rows(processed_data, sep=sep)
312
313    if "bids_subject" in wide.columns:
314        wide = wide.drop_duplicates(subset="bids_subject", keep="last")
315        wide = wide.rename(columns={"bids_subject": "subject_id"})
316
317    ordered_cols = ["subject_id"]
318    for mod_prefix in MODALITY_ORDER:
319        mod_cols = [c for c in wide.columns if c.startswith(mod_prefix + sep)]
320        mod_cols.sort()
321        ordered_cols.extend(mod_cols)
322
323    remaining = [c for c in wide.columns if c not in ordered_cols]
324    if remaining:
325        remaining.sort()
326        ordered_cols.extend(remaining)
327
328    wide = wide[ordered_cols]
329    return wide
def bind_mm_rows(named_dataframes, sep='_'):
174def bind_mm_rows(named_dataframes, sep="_"):
175    if not named_dataframes:
176        return pd.DataFrame()
177
178    processed = []
179
180    for mod_name, df in named_dataframes:
181        df = df.copy()
182        df = df.replace(["", "NA"], pd.NA)
183
184        id_col = df.columns[0]
185        df = df.set_index(id_col)
186
187        if len(df) > 1:
188            df = df.groupby(level=0).last()
189
190        new_cols = {c: mod_name + sep + c for c in df.columns}
191        df = df.rename(columns=new_cols)
192
193        processed.append(df)
194
195    combined = pd.concat(processed, axis=1, join="outer")
196    combined = combined.loc[:, ~combined.columns.duplicated(keep="first")]
197    combined = combined.reindex(sorted(combined.columns), axis=1)
198
199    return combined.reset_index().rename(columns={"index": "subject_id"})
def process_session( session_data, output_root, project_id='ANTsX', denoise_dti=True, dti_moco='SyN', separator='_', verbose=True, build_wide_table=True):
332def process_session(session_data, output_root, project_id="ANTsX",
333                    denoise_dti=True, dti_moco='SyN', separator='_', verbose=True,
334                    build_wide_table=True):
335    """
336    Runs the full ANTsPyMM pipeline on one session.
337    """
338    result = {
339        'success': False,
340        'wide_df': None,
341        'session_dir': None
342    }
343
344    # 1. Setup paths
345    sub_id = session_data['subjectID']
346    date_id = session_data['date']
347
348    # 2. Extract run ID (image_uid)
349    t1_fn = session_data['t1_filename']
350    image_uid = extract_image_id(t1_fn)
351
352    # 3. Setup Staging Area (Root of our temp NRG structure)
353    staging_root = os.path.join(tempfile.gettempdir(), f"antsxmm_staging_{sub_id}_{date_id}")
354    if os.path.exists(staging_root):
355        shutil.rmtree(staging_root)
356    os.makedirs(staging_root, exist_ok=True)
357
358    # 4. Stage Files
359    # T1w
360    t1_path, _, _ = sanitize_and_stage_file(t1_fn, project_id, sub_id, date_id, "T1w", image_uid, separator, staging_root, verbose)
361
362    # FLAIR
363    flair_raw = session_data.get('flair_filename', None)
364    flair_path, _, _ = sanitize_and_stage_file(flair_raw, project_id, sub_id, date_id, "T2Flair", image_uid, separator, staging_root, verbose)
365    flair_info = (flair_path, _)
366
367    # rsfMRI (Handle List & Variants)
368    rsf_raw = session_data.get('rsf_filenames', [])
369    rsf_infos = []
370    rsf_paths = []
371    for f in rsf_raw:
372        path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "rsfMRI", image_uid, separator, staging_root, verbose)
373        if path:
374            rsf_infos.append((path, mod, folder_id))
375            rsf_paths.append(path)
376
377    # DTI (Handle List & Variants)
378    dti_raw = session_data.get('dti_filenames', [])
379    dti_infos = []
380    dti_paths = []
381    for f in dti_raw:
382        path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "DTI", image_uid, separator, staging_root, verbose)
383        if path:
384            dti_infos.append((path, mod, folder_id))
385            dti_paths.append(path)
386
387    # NM (Keep original 'rXXXX' IDs, NM logic handles string IDs)
388    nm_raw = session_data.get('nm_filenames', [])
389    nm_infos = []
390    nm_paths = []
391    for f in nm_raw:
392        rid = extract_image_id(f)
393        if rid == "000": rid = image_uid
394        path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "NM2DMT", rid, separator, staging_root, verbose)
395        if path:
396            nm_infos.append((path, mod, folder_id))
397            nm_paths.append(path)
398
399    # Perf
400    perf_raw = session_data.get('perf_filename', None)
401    perf_path, _, _ = sanitize_and_stage_file(perf_raw, project_id, sub_id, date_id, "perf", image_uid, separator, staging_root, verbose)
402    perf_info = (perf_path, _)
403
404    # PET
405    pet_raw = session_data.get('pet3d_filename', None)
406    pet_path, _, _ = sanitize_and_stage_file(pet_raw, project_id, sub_id, date_id, "pet3d", image_uid, separator, staging_root, verbose)
407    pet_info = (pet_path, _)
408
409    mock_source_dir = staging_root
410
411    try:
412        # Pre-execution check
413        if verbose:
414            print_expected_tree(output_root, project_id, sub_id, date_id, image_uid, 
415                                flair_info, rsf_infos, dti_infos, nm_infos, perf_info, pet_info, separator)
416
417        if verbose:
418            print(f"\n{'='*80}")
419            print(f"Processing: {sub_id} | {date_id}")
420            print(f"Image UID: {image_uid}")
421
422        # Run antspymm preprocessing
423        study_csv = antspymm.generate_mm_dataframe(
424            projectID=project_id,
425            subjectID=sub_id,
426            date=date_id,
427            imageUniqueID=image_uid,
428            modality='T1w',
429            source_image_directory=mock_source_dir,
430            output_image_directory=output_root,
431            t1_filename=t1_path,
432            flair_filename=flair_path,
433            rsf_filenames=rsf_paths,
434            dti_filenames=dti_paths,
435            nm_filenames=nm_paths,
436            perf_filename=perf_path,
437            pet3d_filename=pet_path
438        )
439        
440        # Override IDs for single-file modalities to ensure alignment
441        if 'flairid' in study_csv.columns and flair_path: 
442            study_csv['flairid'] = image_uid
443        
444        # For multi-file modalities (rsfMRI, DTI), we do NOT override IDs.
445        # generate_mm_dataframe maps the input list of file paths to columns rsfid1, rsfid2...
446        # These columns hold the FULL PATH to the file.
447        # antspymm's docsamson uses these columns to check file existence.
448        # Since we passed valid staged paths, they will exist and processing will proceed.
449
450        if perf_path and 'perfid' in study_csv.columns: 
451            study_csv['perfid'] = image_uid
452        if pet_path and 'pet3did' in study_csv.columns: 
453            study_csv['pet3did'] = image_uid
454
455        study_csv_clean = study_csv.dropna(axis=1)
456
457        try:
458            template_path = antspymm.get_data("PPMI_template0", target_extension=".nii.gz")
459            mask_path = antspymm.get_data("PPMI_template0_brainmask", target_extension=".nii.gz")
460            if not template_path or not mask_path:
461                template = None
462            else:
463                template = ants.image_read(template_path)
464                template_mask = ants.image_read(mask_path)
465                template = template * template_mask
466                template = ants.crop_image(template, ants.iMath(template_mask, "MD", 12))
467        except:
468            template = None
469            if verbose:
470                print("Warning: Using default template (None)")
471
472        if verbose:
473            print("Running antspymm.mm_csv()...")
474
475        antspymm.mm_csv(
476            study_csv_clean,
477            mysep=separator,
478            dti_motion_correct=dti_moco,
479            dti_denoise=denoise_dti,
480            normalization_template=template,
481            normalization_template_output='ppmi',
482            normalization_template_transform_type='antsRegistrationSyNQuickRepro[s]',
483            normalization_template_spacing=[1,1,1],
484            srmodel_T1=None, srmodel_NM=None, srmodel_DTI=None
485        )
486
487        result['success'] = True
488        result['session_dir'] = os.path.join(output_root, project_id, sub_id, date_id)
489
490        if build_wide_table:
491            session_output_dir = result['session_dir']
492            if os.path.exists(session_output_dir):
493                try:
494                    wide_df = build_wide_table_from_mmwide(
495                        root_dir=session_output_dir,
496                        sep=separator,
497                        verbose=verbose
498                    )
499                    result['wide_df'] = wide_df
500                    
501                    t1_hier_dir = os.path.join(session_output_dir, "T1wHierarchical", image_uid)
502                    if os.path.exists(t1_hier_dir):
503                        filename = f"{project_id}{separator}{sub_id}{separator}{date_id}{separator}T1wHierarchical{separator}{image_uid}{separator}mmwide_merged.csv"
504                        out_path = os.path.join(t1_hier_dir, filename)
505                        wide_df.to_csv(out_path, index=False)
506                        if verbose:
507                            print(f"[SUCCESS] Session merged wide table written to:\n  {out_path}")
508                    else:
509                        if verbose:
510                            print(f"[WARNING] T1wHierarchical directory not found: {t1_hier_dir}")
511
512                except Exception as e:
513                    if verbose:
514                        print("Warning: Failed to build wide table:", e)
515                    result['wide_df'] = None
516
517        return result
518
519    except Exception as e:
520        print("Error processing {} {}: {}".format(sub_id, date_id, str(e)))
521        traceback.print_exc()
522        return result
523    finally:
524        if os.path.exists(staging_root):
525            shutil.rmtree(staging_root)

Runs the full ANTsPyMM pipeline on one session.

def run_study( bids_dir, output_dir, project, denoise_dti=True, participant_label=None, session_label=None, separator='+'):
23def run_study(bids_dir, output_dir, project, denoise_dti=True, 
24              participant_label=None, session_label=None, separator='+'):
25              
26    print(f"Parsing BIDS layout from: {bids_dir}")
27    layout_df = parse_antsxbids_layout(bids_dir)
28    
29    # Filter for specific participant if requested
30    if participant_label:
31        layout_df = layout_df[layout_df['subjectID'] == participant_label]
32        print(f"Filtering for subject: {participant_label}")
33
34    # Filter for specific session if requested
35    if session_label:
36        layout_df = layout_df[layout_df['date'] == session_label]
37        print(f"Filtering for session: {session_label}")
38
39    if layout_df.empty:
40        print("No valid subjects/sessions found.")
41        return
42
43    print(f"Found {len(layout_df)} unique sessions to process.")
44    os.makedirs(output_dir, exist_ok=True)
45    
46    failures = []
47    wide_tables = []
48    
49    for idx, row in tqdm(layout_df.iterrows(), total=layout_df.shape[0]):
50        result = process_session(
51            row, 
52            output_root=output_dir, 
53            project_id=project,
54            denoise_dti=denoise_dti,
55            dti_moco='SyN',
56            separator=separator,
57            build_wide_table=True
58        )
59        
60        if result['success']:
61            if result['wide_df'] is not None and not result['wide_df'].empty:
62                wide_tables.append(result['wide_df'])
63        else:
64            failures.append(f"{row['subjectID']}_{row['date']}")
65
66    if failures:
67        print(f"Finished with {len(failures)} errors: {failures}")
68    else:
69        print("Processing complete successfully.")
__version__ = '0.8.1.dev1'