Coverage for src / autoencodix / utils / _screader.py: 32%
28 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 10:09 +0200
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 10:09 +0200
1import scanpy as sc # type: ignore
2import mudata as md # type: ignore
3from anndata import AnnData # type: ignore
4from typing import Dict, Any, TYPE_CHECKING
5from autoencodix.configs.default_config import DefaultConfig
7if TYPE_CHECKING:
8 import mudata as md # type: ignore
10 MuData = md.MuData.MuData
11else:
12 MuData = Any
15class SingleCellDataReader:
16 """Reader for multi-modal single-cell data."""
18 @staticmethod
19 def read_data(
20 config: DefaultConfig,
21 ) -> Dict[str, MuData]: # ty: ignore[invalid-type-form]
22 """Read multiple single-cell modalities into MuData object(s).
24 Args:
25 config: Configuration object containing data paths and parameters.
27 Returns:
28 For non-paired translation: Dict of Dicts with {'multi_sc': DataDict} as outer dict and with modalty keys and mudata obj as inner dict.
29 For paired translation and non translation cases: dict with "multi_sc" as key and mudata as value
30 """
31 modalities: Dict[str, AnnData] = {}
33 for mod_key, mod_info in config.data_config.data_info.items():
34 if not mod_info.is_single_cell:
35 continue
36 adata = sc.read_h5ad(mod_info.file_path)
37 modalities[mod_key] = adata
39 # if config.requires_paired:
40 # mdata = md.MuData(modalities)
41 # common_cells = list(
42 # set.intersection(
43 # *(set(adata.obs_names) for adata in modalities.values())
44 # )
45 # )
46 # print(f"Number of common cells: {len(common_cells)}")
47 # mdata = mdata[common_cells]
48 # return {"multi_sc": mdata}
50 if config.requires_paired:
51 common_cells_set = set.intersection(
52 *(set(adata.obs_names) for adata in modalities.values())
53 )
54 common_cells_sorted = sorted(list(common_cells_set))
56 # Subset EACH modality individually with the sorted common cells
57 # This ensures each modality is aligned to the same order
58 aligned_modalities = {}
59 for mod_key, adata in modalities.items():
60 aligned_modalities[mod_key] = adata[common_cells_sorted].copy()
61 mdata = md.MuData(aligned_modalities)
63 print(f"Number of common cells: {len(common_cells_sorted)}")
65 # Clean obs_names: remove modality prefixes
66 cleaned_names = [
67 name.split(":")[-1] if ":" in name else name
68 for name in mdata.obs.columns
69 ]
70 mdata.obs.columns = cleaned_names
72 # Remove duplicate columns from obs
73 mdata.obs = mdata.obs.loc[:, ~mdata.obs.columns.duplicated(keep="first")]
75 return {"multi_sc": mdata}
76 return {"multi_sc": modalities}