- class miml.data.miml_dataset.MIMLDataset#
Bases:
object
Class to manage MIML data obtained from datasets
- add_attribute(name: str, position: int | None = None, values: ndarray | None = None, feature: bool = True) None #
Add attribute to the dataset
Parameters#
- namestr
Name of the new attribute
- positionint, default = None
Index for the new attribute
- values: ndarray of shape(n_instances)
Values for the new attribute
- featurebool
Boolean value to determine if the attribute added is a feature or a label
- add_bag(bag: Bag) None #
Add a bag to the dataset
Parameters#
- bagBag
Instance of Bag class to be added
- add_instance(bag, instance: Instance) None #
Add an Instance to a Bag of the dataset
Parameters#
- bagint/str
Index or key of the bag where the instance will be added
- instanceInstance
Instance of Instance class to be added
- cardinality()#
Computes the Cardinality as the average number of labels per pattern.
Returns#
- cardinalityfloat
Average number of labels per pattern
- delete_attribute(position: int) None #
Delete attribute of the dataset
Parameters#
- positionint
Index of the attribute to be deleted
- delete_bag(key_bag: str) None #
Delete a bag of the dataset
Parameters#
- key_bagstr
Key of the bag to be deleted
- delete_instance(bag, index_instance: int) None #
Delete an instance of a bag of the dataset
Parameters#
- bagint/str
Index or key of the bag which contains the instance to be deleted
- index_instanceint
Index of the instance to be deleted
- density()#
Computes the density as the cardinality / numLabels.
Returns#
- densityfloat
Cardinality divided by number of labels
- describe()#
Print statistics about the dataset
- distinct()#
Computes the numbers of labels combinations used in the dataset respect all the possible ones
Returns#
- distinctfloat
Numbers of labels combinations used in the dataset divided by all possible combinations
- get_attribute(bag, instance, attribute) float #
Get value of an attribute of the bag
Parameters#
- bagstr
Key of the bag which contains the attribute
- instanceint
Index of the instance in the bag
- attributeint/str
Index/Name of the attribute
Returns#
- valuefloat
Value of the attribute
- get_attributes() ndarray #
Get attributes values of the dataset
Returns#
- attributes data: ndarray of shape (n_instances, n_attributes)
Values of the attributes of the dataset
- get_attributes_name() list[str] #
Get attributes name
Returns#
- attributeslist[str]
Attributes name of the dataset
- get_bag(bag) Bag #
Get data of a bag of the dataset
Parameters#
- bag: int/str
Index or key of the bag to be obtained
Returns#
- bag: Bag
Instance of Bag class
- get_features() ndarray #
Get features values of the dataset
Returns#
- features: ndarray of shape (n_instances, n_features)
Values of the features of the dataset
- get_features_by_bag() ndarray #
Get features values of the dataset by bag
Returns#
- features: ndarray of shape (n_bags, n_instances, n_features)
Values of the features of the dataset
- get_features_name() list[str] #
Get function for dataset features name
Returns#
- attributeslist[str]
Attributes name of the dataset
- get_instance(key_bag, index_instance) Instance #
Get an Instance of the dataset
Parameters#
- key_bagstr
Key of the bag
- index_instanceint
Index of the instance in the bag
Returns#
- instanceInstance
Instance of Instance class
- get_labels()#
Get labels values of the dataset
Returns#
- labels: ndarray of shape (n_instances, n_labels)
Values of the labels of the dataset
- get_labels_by_bag()#
Get labels values of the dataset
Returns#
- labelsndarray of shape (n_bags, n_labels)
Values of the labels of the dataset
- get_labels_name() list[str] #
Get function for dataset labels name
Returns#
- labelslist[str]
Labels name of the dataset
- get_number_attributes() int #
Get numbers of attributes of the bag
Returns#
- numbers of attributes: int
Numbers of attributes of the bag
- get_number_bags() int #
Get numbers of bags of the dataset
Returns#
- numbers of bags: int
Numbers of bags of the dataset
- get_number_features() int #
Get numbers of attributes of the dataset
Returns#
- numbers of attributes: int
Numbers of attributes of the dataset
- get_number_instances() int #
Get numbers of instances of the dataset
Returns#
- numbers of instances: int
Numbers of instances of the dataset
- get_number_labels() int #
Get numbers of labels of the dataset
Returns#
- numbers of labels: int
Numbers of labels of the dataset
- get_statistics()#
Calculate statistics of the dataset
Returns#
- n_instancesint
Numbers of instances of the dataset
- min_instancesint
Number of instances in the bag with minimum number of instances
- max_instancesint
Number of instances in the bag with maximum number of instances
- distributiondict
Distribution of number of instances in bags
- set_attribute(bag, index_instance: int, attribute, value: float) None #
Update value from attributes
Parameters#
- bagint/str
Index or key of the bag of the dataset
- index_instanceint
Index of the instance
- attribute: int/str
Attribute of the dataset
- value: float
New value for the update
- set_features_name(features: list[str]) None #
Set function for dataset features name
Parameters#
- featureslist[str]
List of the features name of the dataset
- set_labels_name(labels: list[str]) None #
Set function for dataset labels name
Parameters#
- labels: list[str]
List of the labels name of the dataset
- show_dataset(start: int = 0, end: int | None = None, attributes=None, mode: str = 'table', info=False) None #
Function to show information about the dataset
Parameters#
- startint
Index of bag to start showing
- endint
Index of bag to end showing
- attributes: List of string
Attributes to show
- modestr
Mode to show the dataset. Modes available are “table” and “compact” (csv format)
- info: Boolean
Show more info
- split_dataset(train_percentage: float = 0.8, seed=0)#
Split dataset in two parts, one for training and the other for test
Parameters#
- train_percentagefloat
Percentage of bags in train dataset
- seed: int
Seed to generate random numbers
Returns#
- dataset_treinMIMLDataset
Train dataset
- dataset_test:MIMLDataset
Test dataset