Coverage for src / autoencodix / ontix.py: 54%

50 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-21 10:09 +0200

1from typing import Dict, Optional, Type, Union, Tuple, List 

2import torch 

3import numpy as np 

4 

5from autoencodix.base._base_dataset import BaseDataset 

6from autoencodix.base._base_loss import BaseLoss 

7from autoencodix.base._base_pipeline import BasePipeline 

8from autoencodix.base._base_trainer import BaseTrainer 

9from autoencodix.base._base_visualizer import BaseVisualizer 

10from autoencodix.base._base_preprocessor import BasePreprocessor 

11from autoencodix.base._base_autoencoder import BaseAutoencoder 

12from autoencodix.base._base_evaluator import BaseEvaluator 

13from autoencodix.data._datasetcontainer import DatasetContainer 

14from autoencodix.data._datasplitter import DataSplitter 

15from autoencodix.data.datapackage import DataPackage 

16from autoencodix.data._numeric_dataset import NumericDataset 

17from autoencodix.data.general_preprocessor import GeneralPreprocessor 

18from autoencodix.evaluate._general_evaluator import GeneralEvaluator 

19 

20# from autoencodix.modeling._varix_architecture import VarixArchitecture 

21from autoencodix.modeling._ontix_architecture import OntixArchitecture 

22from autoencodix.trainers._ontix_trainer import OntixTrainer 

23from autoencodix.utils._result import Result 

24from autoencodix.configs.default_config import DefaultConfig 

25 

26from autoencodix.configs.ontix_config import OntixConfig 

27from autoencodix.utils._losses import VarixLoss 

28from autoencodix.visualize._general_visualizer import GeneralVisualizer 

29 

30 

31## Copy from Varix with ontology addition 

32class Ontix(BasePipeline): 

33 """Ontix specific version of the BasePipeline class. 

34 

35 Inherits preprocess, fit, predict, evaluate, and visualize methods from BasePipeline. 

36 

37 This class extends BasePipeline. See the parent class for a full list 

38 of attributes and methods. 

39 

40 Additional Attributes: 

41 _default_config: Is set to OntixConfig here. 

42 

43 """ 

44 

45 def __init__( 

46 self, 

47 ontologies: Union[Tuple, List], # Addition to Varix, mandotory for Ontix 

48 sep: Optional[str] = "\t", # Addition to Varix, optional to read in ontologies 

49 data: Optional[Union[DataPackage, DatasetContainer]] = None, 

50 trainer_type: Type[BaseTrainer] = OntixTrainer, 

51 dataset_type: Type[BaseDataset] = NumericDataset, 

52 model_type: Type[BaseAutoencoder] = OntixArchitecture, 

53 loss_type: Type[BaseLoss] = VarixLoss, 

54 preprocessor_type: Type[BasePreprocessor] = GeneralPreprocessor, 

55 visualizer: Type[BaseVisualizer] = GeneralVisualizer, 

56 evaluator: Optional[Type[BaseEvaluator]] = GeneralEvaluator, 

57 result: Optional[Result] = None, 

58 datasplitter_type: Type[DataSplitter] = DataSplitter, 

59 custom_splits: Optional[Dict[str, np.ndarray]] = None, 

60 config: Optional[DefaultConfig] = None, 

61 ) -> None: 

62 """Initialize Ontix pipeline with customizable components. 

63 

64 Some components are passed as types rather than instances because they require 

65 data that is only available after preprocessing. 

66 

67 See parent class for full list of Arguments. 

68 

69 Raises: 

70 TypeError: if ontologies are not a Tuple or List. 

71 

72 """ 

73 self._default_config = OntixConfig() 

74 if isinstance(ontologies, tuple): 

75 self.ontologies = ontologies 

76 elif isinstance(ontologies, list): 

77 if sep is None: 

78 raise ValueError( 

79 "If ontologies are provided as a list, the seperator 'sep' cannot be None. " 

80 ) 

81 ontologies_dict_list = [ 

82 self._read_ont_file(ont_file, sep=sep) for ont_file in ontologies 

83 ] 

84 self.ontologies = tuple(ontologies_dict_list) 

85 else: 

86 raise TypeError( 

87 f"Expected ontologies to be of type tuple or list, got {type(ontologies)}." 

88 ) 

89 

90 config.latent_dim = len( 

91 self.ontologies[0] 

92 ) # Set latent dim to number of nodes in the ontology 

93 

94 super().__init__( 

95 data=data, 

96 dataset_type=dataset_type, 

97 trainer_type=trainer_type, 

98 model_type=model_type, 

99 loss_type=loss_type, 

100 preprocessor_type=preprocessor_type, 

101 visualizer=visualizer, 

102 evaluator=evaluator, 

103 result=result, 

104 datasplitter_type=datasplitter_type, 

105 config=config, 

106 custom_split=custom_splits, 

107 ontologies=self.ontologies, 

108 ) 

109 if not isinstance(self.config, OntixConfig): 

110 raise TypeError( 

111 f"For Ontix Pipeline, we only allow OntixConfig as type for config, got {type(self.config)}" 

112 ) 

113 

114 def _read_ont_file(self, file_path: str, sep: str = "\t") -> dict: 

115 """Function to read-in text files of ontologies with format child - separator - parent into an dictionary. 

116 

117 Args: 

118 file_path: Path to file with ontology 

119 sep: Separator used in file 

120 Returns: 

121 ont_dic: Dictionary containing the ontology as described in the text file. 

122 

123 """ 

124 ont_dic = dict() 

125 with open(file_path, "r") as ont_file: 

126 for line in ont_file: 

127 id_parent = line.strip().split(sep)[1] 

128 id_child = line.split(sep)[0] 

129 

130 if id_parent in ont_dic: 

131 ont_dic[id_parent].append(id_child) 

132 else: 

133 ont_dic[id_parent] = list() 

134 ont_dic[id_parent].append(id_child) 

135 

136 return ont_dic