antsxmm
1try: 2 from ._version import version as __version__ 3except ImportError: 4 from importlib.metadata import version, PackageNotFoundError 5 try: 6 __version__ = version("antsxmm") 7 except PackageNotFoundError: 8 __version__ = "0.0.0-unknown" 9 10from .bids import parse_antsxbids_layout 11from .core import process_session 12from .pipeline import run_study 13from .core import bind_mm_rows 14from .core import check_modality_order 15from .core import build_wide_table_from_mmwide 16 17__all__ = ['parse_antsxbids_layout', 'build_wide_table_from_mmwide', 'bind_mm_rows', 'process_session', 'run_study', '__version__']
def
parse_antsxbids_layout(bids_root):
6def parse_antsxbids_layout(bids_root): 7 """ 8 Parses a BIDS directory generated by antsxbids. 9 Returns a pandas DataFrame where each row is a unique Session. 10 """ 11 bids_path = Path(bids_root) 12 if not bids_path.exists(): 13 raise FileNotFoundError(f"Directory not found: {bids_root}") 14 15 # Use glob matching for robustness 16 subjects = sorted([d for d in bids_path.glob("sub*") if d.is_dir()]) 17 18 sessions_data = [] 19 20 for sub_dir in subjects: 21 # CHANGED: Use the full folder name as the ID (e.g. 'sub-211239') 22 sub_id = sub_dir.name 23 24 # Find sessions 25 sessions = sorted([d for d in sub_dir.glob("ses*") if d.is_dir()]) 26 27 for ses_dir in sessions: 28 # CHANGED: Use the full folder name as the ID (e.g. 'ses-20230405') 29 ses_id = ses_dir.name 30 31 # Initialize dict for this session 32 data = { 33 'subjectID': sub_id, 34 'date': ses_id, 35 'session_path': str(ses_dir) 36 } 37 38 # 1. Anatomy (T1w, FLAIR) 39 anat_dir = ses_dir / 'anat' 40 if anat_dir.exists(): 41 t1s = sorted(list(anat_dir.glob("*T1w.nii.gz"))) 42 flairs = sorted(list(anat_dir.glob("*FLAIR.nii.gz"))) 43 44 if t1s: 45 data['t1_filename'] = str(t1s[0]) 46 if flairs: 47 data['flair_filename'] = str(flairs[0]) 48 49 # 2. DWI 50 dwi_dir = ses_dir / 'dwi' 51 dwi_files = [] 52 if dwi_dir.exists(): 53 dwi_files = sorted([str(p) for p in dwi_dir.glob("*.nii.gz")]) 54 data['dti_filenames'] = dwi_files 55 56 # 3. Functional 57 func_dir = ses_dir / 'func' 58 rsf_files = [] 59 if func_dir.exists(): 60 rsf_files = sorted([str(p) for p in func_dir.glob("*bold.nii.gz")]) 61 data['rsf_filenames'] = rsf_files 62 63 # 4. Neuromelanin 64 nm_dir = ses_dir / 'melanin' 65 nm_files = [] 66 if nm_dir.exists(): 67 nm_files = sorted([str(p) for p in nm_dir.glob("*NM.nii.gz")]) 68 data['nm_filenames'] = nm_files 69 70 # 5. Perfusion (ASL) - Placeholder logic, assuming 'perf' folder 71 perf_dir = ses_dir / 'perf' 72 if perf_dir.exists(): 73 # Grab the first available ASL file 74 perfs = sorted(list(perf_dir.glob("*.nii.gz"))) 75 if perfs: 76 data['perf_filename'] = str(perfs[0]) 77 78 # 6. PET (pet3d) - Placeholder logic, assuming 'pet' folder 79 pet_dir = ses_dir / 'pet' 80 if pet_dir.exists(): 81 # Grab the first available PET file 82 pets = sorted(list(pet_dir.glob("*.nii.gz"))) 83 if pets: 84 data['pet3d_filename'] = str(pets[0]) 85 86 # Only add if we have a T1 87 if 't1_filename' in data: 88 sessions_data.append(data) 89 90 return pd.DataFrame(sessions_data)
Parses a BIDS directory generated by antsxbids. Returns a pandas DataFrame where each row is a unique Session.
def
build_wide_table_from_mmwide(root_dir, pattern='**/*_mmwide.csv', sep='_', verbose=True):
211def build_wide_table_from_mmwide(root_dir, pattern="**/*_mmwide.csv", sep="_", verbose=True): 212 root = Path(root_dir).expanduser().resolve() 213 csv_files = sorted(root.rglob(pattern)) 214 215 if verbose: 216 print(f"\nFound {len(csv_files)} *_mmwide.csv files") 217 for f in csv_files: 218 print(" ->", f.relative_to(root)) 219 220 MODALITY_MAP = { 221 "T1wHierarchical": "T1Hier", 222 "T1Hier": "T1Hier", 223 "T1w": "T1w", 224 "NM2DMT": "NM2DMT", 225 "NM": "NM2DMT", 226 "DTI": "DTI", 227 "rsfMRI": "rsfMRI", 228 "T2Flair": "T2Flair", 229 "FLAIR": "T2Flair", 230 "perf": "perf", 231 "pet3d": "pet3d", 232 "PET": "pet3d", 233 } 234 235 MODALITY_ORDER = ["T1Hier", "T1w", "DTI", "rsfMRI", "T2Flair", "NM2DMT", "perf", "pet3d"] 236 237 raw_data = [] 238 239 for csv_path in csv_files: 240 sub = next((p for p in csv_path.parts if p.startswith("sub-")), None) 241 ses = next((p for p in csv_path.parts if p.startswith("ses-")), None) 242 subject_key = f"{sub}_{ses}" if sub and ses else "UNKNOWN" 243 244 matched_prefix = None 245 best_len = 0 246 for clue, prefix in MODALITY_MAP.items(): 247 if any(clue in part for part in csv_path.parts) or clue in csv_path.name: 248 if len(clue) > best_len: 249 matched_prefix = prefix 250 best_len = len(clue) 251 252 if not matched_prefix: 253 if verbose: 254 print(f" SKIP: unknown modality for file {csv_path.name}") 255 continue 256 257 if verbose: 258 print(f"\nProcessing: {matched_prefix.ljust(10)} | {csv_path.name}") 259 260 df = pd.read_csv(csv_path) 261 262 drop_cols = [c for c in df.columns if "hier_id" in c.lower()] 263 if drop_cols: 264 df = df.drop(columns=drop_cols) 265 if verbose: 266 print(f" Dropped columns: {drop_cols}") 267 268 if len(df) > 1: 269 if verbose: 270 print(f" Collapsing {len(df)} rows to last") 271 df = df.iloc[[-1]].copy() 272 273 df.insert(0, "bids_subject", subject_key) 274 275 raw_data.append((matched_prefix, df)) 276 277 if not raw_data: 278 if verbose: print("No valid *_mmwide.csv files were loaded!") 279 return pd.DataFrame() 280 281 ordered_data = [] 282 seen_mods = set() 283 284 for mod in MODALITY_ORDER: 285 for m, d in raw_data: 286 if m == mod: 287 ordered_data.append((m, d)) 288 seen_mods.add(m) 289 break 290 291 for m, d in raw_data: 292 if m not in seen_mods: 293 ordered_data.append((m, d)) 294 295 check_modality_order(ordered_data, MODALITY_ORDER) 296 297 t1hier_df = next((df for mod, df in ordered_data if mod == "T1Hier"), None) 298 t1hier_raw_cols = set() 299 if t1hier_df is not None: 300 t1hier_raw_cols = set(t1hier_df.columns) - {"bids_subject"} 301 302 processed_data = [] 303 for mod, df in ordered_data: 304 if mod != "T1Hier" and t1hier_raw_cols: 305 overlap = t1hier_raw_cols & set(df.columns) 306 if overlap and verbose: 307 print(f" Excluding {len(overlap)} overlapping columns from {mod}") 308 df = df.drop(columns=overlap, errors='ignore') 309 processed_data.append((mod, df)) 310 311 wide = bind_mm_rows(processed_data, sep=sep) 312 313 if "bids_subject" in wide.columns: 314 wide = wide.drop_duplicates(subset="bids_subject", keep="last") 315 wide = wide.rename(columns={"bids_subject": "subject_id"}) 316 317 ordered_cols = ["subject_id"] 318 for mod_prefix in MODALITY_ORDER: 319 mod_cols = [c for c in wide.columns if c.startswith(mod_prefix + sep)] 320 mod_cols.sort() 321 ordered_cols.extend(mod_cols) 322 323 remaining = [c for c in wide.columns if c not in ordered_cols] 324 if remaining: 325 remaining.sort() 326 ordered_cols.extend(remaining) 327 328 wide = wide[ordered_cols] 329 return wide
def
bind_mm_rows(named_dataframes, sep='_'):
174def bind_mm_rows(named_dataframes, sep="_"): 175 if not named_dataframes: 176 return pd.DataFrame() 177 178 processed = [] 179 180 for mod_name, df in named_dataframes: 181 df = df.copy() 182 df = df.replace(["", "NA"], pd.NA) 183 184 id_col = df.columns[0] 185 df = df.set_index(id_col) 186 187 if len(df) > 1: 188 df = df.groupby(level=0).last() 189 190 new_cols = {c: mod_name + sep + c for c in df.columns} 191 df = df.rename(columns=new_cols) 192 193 processed.append(df) 194 195 combined = pd.concat(processed, axis=1, join="outer") 196 combined = combined.loc[:, ~combined.columns.duplicated(keep="first")] 197 combined = combined.reindex(sorted(combined.columns), axis=1) 198 199 return combined.reset_index().rename(columns={"index": "subject_id"})
def
process_session( session_data, output_root, project_id='ANTsX', denoise_dti=True, dti_moco='SyN', separator='_', verbose=True, build_wide_table=True):
332def process_session(session_data, output_root, project_id="ANTsX", 333 denoise_dti=True, dti_moco='SyN', separator='_', verbose=True, 334 build_wide_table=True): 335 """ 336 Runs the full ANTsPyMM pipeline on one session. 337 """ 338 result = { 339 'success': False, 340 'wide_df': None, 341 'session_dir': None 342 } 343 344 # 1. Setup paths 345 sub_id = session_data['subjectID'] 346 date_id = session_data['date'] 347 348 # 2. Extract run ID (image_uid) 349 t1_fn = session_data['t1_filename'] 350 image_uid = extract_image_id(t1_fn) 351 352 # 3. Setup Staging Area (Root of our temp NRG structure) 353 staging_root = os.path.join(tempfile.gettempdir(), f"antsxmm_staging_{sub_id}_{date_id}") 354 if os.path.exists(staging_root): 355 shutil.rmtree(staging_root) 356 os.makedirs(staging_root, exist_ok=True) 357 358 # 4. Stage Files 359 # T1w 360 t1_path, _, _ = sanitize_and_stage_file(t1_fn, project_id, sub_id, date_id, "T1w", image_uid, separator, staging_root, verbose) 361 362 # FLAIR 363 flair_raw = session_data.get('flair_filename', None) 364 flair_path, _, _ = sanitize_and_stage_file(flair_raw, project_id, sub_id, date_id, "T2Flair", image_uid, separator, staging_root, verbose) 365 flair_info = (flair_path, _) 366 367 # rsfMRI (Handle List & Variants) 368 rsf_raw = session_data.get('rsf_filenames', []) 369 rsf_infos = [] 370 rsf_paths = [] 371 for f in rsf_raw: 372 path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "rsfMRI", image_uid, separator, staging_root, verbose) 373 if path: 374 rsf_infos.append((path, mod, folder_id)) 375 rsf_paths.append(path) 376 377 # DTI (Handle List & Variants) 378 dti_raw = session_data.get('dti_filenames', []) 379 dti_infos = [] 380 dti_paths = [] 381 for f in dti_raw: 382 path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "DTI", image_uid, separator, staging_root, verbose) 383 if path: 384 dti_infos.append((path, mod, folder_id)) 385 dti_paths.append(path) 386 387 # NM (Keep original 'rXXXX' IDs, NM logic handles string IDs) 388 nm_raw = session_data.get('nm_filenames', []) 389 nm_infos = [] 390 nm_paths = [] 391 for f in nm_raw: 392 rid = extract_image_id(f) 393 if rid == "000": rid = image_uid 394 path, mod, folder_id = sanitize_and_stage_file(f, project_id, sub_id, date_id, "NM2DMT", rid, separator, staging_root, verbose) 395 if path: 396 nm_infos.append((path, mod, folder_id)) 397 nm_paths.append(path) 398 399 # Perf 400 perf_raw = session_data.get('perf_filename', None) 401 perf_path, _, _ = sanitize_and_stage_file(perf_raw, project_id, sub_id, date_id, "perf", image_uid, separator, staging_root, verbose) 402 perf_info = (perf_path, _) 403 404 # PET 405 pet_raw = session_data.get('pet3d_filename', None) 406 pet_path, _, _ = sanitize_and_stage_file(pet_raw, project_id, sub_id, date_id, "pet3d", image_uid, separator, staging_root, verbose) 407 pet_info = (pet_path, _) 408 409 mock_source_dir = staging_root 410 411 try: 412 # Pre-execution check 413 if verbose: 414 print_expected_tree(output_root, project_id, sub_id, date_id, image_uid, 415 flair_info, rsf_infos, dti_infos, nm_infos, perf_info, pet_info, separator) 416 417 if verbose: 418 print(f"\n{'='*80}") 419 print(f"Processing: {sub_id} | {date_id}") 420 print(f"Image UID: {image_uid}") 421 422 # Run antspymm preprocessing 423 study_csv = antspymm.generate_mm_dataframe( 424 projectID=project_id, 425 subjectID=sub_id, 426 date=date_id, 427 imageUniqueID=image_uid, 428 modality='T1w', 429 source_image_directory=mock_source_dir, 430 output_image_directory=output_root, 431 t1_filename=t1_path, 432 flair_filename=flair_path, 433 rsf_filenames=rsf_paths, 434 dti_filenames=dti_paths, 435 nm_filenames=nm_paths, 436 perf_filename=perf_path, 437 pet3d_filename=pet_path 438 ) 439 440 # Override IDs for single-file modalities to ensure alignment 441 if 'flairid' in study_csv.columns and flair_path: 442 study_csv['flairid'] = image_uid 443 444 # For multi-file modalities (rsfMRI, DTI), we do NOT override IDs. 445 # generate_mm_dataframe maps the input list of file paths to columns rsfid1, rsfid2... 446 # These columns hold the FULL PATH to the file. 447 # antspymm's docsamson uses these columns to check file existence. 448 # Since we passed valid staged paths, they will exist and processing will proceed. 449 450 if perf_path and 'perfid' in study_csv.columns: 451 study_csv['perfid'] = image_uid 452 if pet_path and 'pet3did' in study_csv.columns: 453 study_csv['pet3did'] = image_uid 454 455 study_csv_clean = study_csv.dropna(axis=1) 456 457 try: 458 template_path = antspymm.get_data("PPMI_template0", target_extension=".nii.gz") 459 mask_path = antspymm.get_data("PPMI_template0_brainmask", target_extension=".nii.gz") 460 if not template_path or not mask_path: 461 template = None 462 else: 463 template = ants.image_read(template_path) 464 template_mask = ants.image_read(mask_path) 465 template = template * template_mask 466 template = ants.crop_image(template, ants.iMath(template_mask, "MD", 12)) 467 except: 468 template = None 469 if verbose: 470 print("Warning: Using default template (None)") 471 472 if verbose: 473 print("Running antspymm.mm_csv()...") 474 475 antspymm.mm_csv( 476 study_csv_clean, 477 mysep=separator, 478 dti_motion_correct=dti_moco, 479 dti_denoise=denoise_dti, 480 normalization_template=template, 481 normalization_template_output='ppmi', 482 normalization_template_transform_type='antsRegistrationSyNQuickRepro[s]', 483 normalization_template_spacing=[1,1,1], 484 srmodel_T1=None, srmodel_NM=None, srmodel_DTI=None 485 ) 486 487 result['success'] = True 488 result['session_dir'] = os.path.join(output_root, project_id, sub_id, date_id) 489 490 if build_wide_table: 491 session_output_dir = result['session_dir'] 492 if os.path.exists(session_output_dir): 493 try: 494 wide_df = build_wide_table_from_mmwide( 495 root_dir=session_output_dir, 496 sep=separator, 497 verbose=verbose 498 ) 499 result['wide_df'] = wide_df 500 501 t1_hier_dir = os.path.join(session_output_dir, "T1wHierarchical", image_uid) 502 if os.path.exists(t1_hier_dir): 503 filename = f"{project_id}{separator}{sub_id}{separator}{date_id}{separator}T1wHierarchical{separator}{image_uid}{separator}mmwide_merged.csv" 504 out_path = os.path.join(t1_hier_dir, filename) 505 wide_df.to_csv(out_path, index=False) 506 if verbose: 507 print(f"[SUCCESS] Session merged wide table written to:\n {out_path}") 508 else: 509 if verbose: 510 print(f"[WARNING] T1wHierarchical directory not found: {t1_hier_dir}") 511 512 except Exception as e: 513 if verbose: 514 print("Warning: Failed to build wide table:", e) 515 result['wide_df'] = None 516 517 return result 518 519 except Exception as e: 520 print("Error processing {} {}: {}".format(sub_id, date_id, str(e))) 521 traceback.print_exc() 522 return result 523 finally: 524 if os.path.exists(staging_root): 525 shutil.rmtree(staging_root)
Runs the full ANTsPyMM pipeline on one session.
def
run_study( bids_dir, output_dir, project, denoise_dti=True, participant_label=None, session_label=None, separator='+'):
23def run_study(bids_dir, output_dir, project, denoise_dti=True, 24 participant_label=None, session_label=None, separator='+'): 25 26 print(f"Parsing BIDS layout from: {bids_dir}") 27 layout_df = parse_antsxbids_layout(bids_dir) 28 29 # Filter for specific participant if requested 30 if participant_label: 31 layout_df = layout_df[layout_df['subjectID'] == participant_label] 32 print(f"Filtering for subject: {participant_label}") 33 34 # Filter for specific session if requested 35 if session_label: 36 layout_df = layout_df[layout_df['date'] == session_label] 37 print(f"Filtering for session: {session_label}") 38 39 if layout_df.empty: 40 print("No valid subjects/sessions found.") 41 return 42 43 print(f"Found {len(layout_df)} unique sessions to process.") 44 os.makedirs(output_dir, exist_ok=True) 45 46 failures = [] 47 wide_tables = [] 48 49 for idx, row in tqdm(layout_df.iterrows(), total=layout_df.shape[0]): 50 result = process_session( 51 row, 52 output_root=output_dir, 53 project_id=project, 54 denoise_dti=denoise_dti, 55 dti_moco='SyN', 56 separator=separator, 57 build_wide_table=True 58 ) 59 60 if result['success']: 61 if result['wide_df'] is not None and not result['wide_df'].empty: 62 wide_tables.append(result['wide_df']) 63 else: 64 failures.append(f"{row['subjectID']}_{row['date']}") 65 66 if failures: 67 print(f"Finished with {len(failures)} errors: {failures}") 68 else: 69 print("Processing complete successfully.")
__version__ =
'0.8.1.dev1'