class miml.data.miml_dataset.MIMLDataset#

Bases: object

Class to manage MIML data obtained from datasets

add_attribute(name: str, position: int | None = None, values: ndarray | None = None, feature: bool = True) None#

Add attribute to the dataset

Parameters#

namestr

Name of the new attribute

positionint, default = None

Index for the new attribute

values: ndarray of shape(n_instances)

Values for the new attribute

featurebool

Boolean value to determine if the attribute added is a feature or a label

add_bag(bag: Bag) None#

Add a bag to the dataset

Parameters#

bagBag

Instance of Bag class to be added

add_instance(bag, instance: Instance) None#

Add an Instance to a Bag of the dataset

Parameters#

bagint/str

Index or key of the bag where the instance will be added

instanceInstance

Instance of Instance class to be added

cardinality()#

Computes the Cardinality as the average number of labels per pattern.

Returns#

cardinalityfloat

Average number of labels per pattern

delete_attribute(position: int) None#

Delete attribute of the dataset

Parameters#

positionint

Index of the attribute to be deleted

delete_bag(key_bag: str) None#

Delete a bag of the dataset

Parameters#

key_bagstr

Key of the bag to be deleted

delete_instance(bag, index_instance: int) None#

Delete an instance of a bag of the dataset

Parameters#

bagint/str

Index or key of the bag which contains the instance to be deleted

index_instanceint

Index of the instance to be deleted

density()#

Computes the density as the cardinality / numLabels.

Returns#

densityfloat

Cardinality divided by number of labels

describe()#

Print statistics about the dataset

distinct()#

Computes the numbers of labels combinations used in the dataset respect all the possible ones

Returns#

distinctfloat

Numbers of labels combinations used in the dataset divided by all possible combinations

get_attribute(bag, instance, attribute) float#

Get value of an attribute of the bag

Parameters#

bagstr

Key of the bag which contains the attribute

instanceint

Index of the instance in the bag

attributeint/str

Index/Name of the attribute

Returns#

valuefloat

Value of the attribute

get_attributes() ndarray#

Get attributes values of the dataset

Returns#

attributes data: ndarray of shape (n_instances, n_attributes)

Values of the attributes of the dataset

get_attributes_name() list[str]#

Get attributes name

Returns#

attributeslist[str]

Attributes name of the dataset

get_bag(bag) Bag#

Get data of a bag of the dataset

Parameters#

bag: int/str

Index or key of the bag to be obtained

Returns#

bag: Bag

Instance of Bag class

get_features() ndarray#

Get features values of the dataset

Returns#

features: ndarray of shape (n_instances, n_features)

Values of the features of the dataset

get_features_by_bag() ndarray#

Get features values of the dataset by bag

Returns#

features: ndarray of shape (n_bags, n_instances, n_features)

Values of the features of the dataset

get_features_name() list[str]#

Get function for dataset features name

Returns#

attributeslist[str]

Attributes name of the dataset

get_instance(key_bag, index_instance) Instance#

Get an Instance of the dataset

Parameters#

key_bagstr

Key of the bag

index_instanceint

Index of the instance in the bag

Returns#

instanceInstance

Instance of Instance class

get_labels()#

Get labels values of the dataset

Returns#

labels: ndarray of shape (n_instances, n_labels)

Values of the labels of the dataset

get_labels_by_bag()#

Get labels values of the dataset

Returns#

labelsndarray of shape (n_bags, n_labels)

Values of the labels of the dataset

get_labels_name() list[str]#

Get function for dataset labels name

Returns#

labelslist[str]

Labels name of the dataset

get_name() str#

Get function for dataset name

Returns#

namestr

Name of the dataset

get_number_attributes() int#

Get numbers of attributes of the bag

Returns#

numbers of attributes: int

Numbers of attributes of the bag

get_number_bags() int#

Get numbers of bags of the dataset

Returns#

numbers of bags: int

Numbers of bags of the dataset

get_number_features() int#

Get numbers of attributes of the dataset

Returns#

numbers of attributes: int

Numbers of attributes of the dataset

get_number_instances() int#

Get numbers of instances of the dataset

Returns#

numbers of instances: int

Numbers of instances of the dataset

get_number_labels() int#

Get numbers of labels of the dataset

Returns#

numbers of labels: int

Numbers of labels of the dataset

get_statistics()#

Calculate statistics of the dataset

Returns#

n_instancesint

Numbers of instances of the dataset

min_instancesint

Number of instances in the bag with minimum number of instances

max_instancesint

Number of instances in the bag with maximum number of instances

distributiondict

Distribution of number of instances in bags

set_attribute(bag, index_instance: int, attribute, value: float) None#

Update value from attributes

Parameters#

bagint/str

Index or key of the bag of the dataset

index_instanceint

Index of the instance

attribute: int/str

Attribute of the dataset

value: float

New value for the update

set_features_name(features: list[str]) None#

Set function for dataset features name

Parameters#

featureslist[str]

List of the features name of the dataset

set_labels_name(labels: list[str]) None#

Set function for dataset labels name

Parameters#

labels: list[str]

List of the labels name of the dataset

set_name(name) None#

Set function for dataset name

Parameters#

namestr

Name of the dataset

show_dataset(start: int = 0, end: int | None = None, attributes=None, mode: str = 'table', info=False) None#

Function to show information about the dataset

Parameters#

startint

Index of bag to start showing

endint

Index of bag to end showing

attributes: List of string

Attributes to show

modestr

Mode to show the dataset. Modes available are “table” and “compact” (csv format)

info: Boolean

Show more info

split_dataset(train_percentage: float = 0.8, seed=0)#

Split dataset in two parts, one for training and the other for test

Parameters#

train_percentagefloat

Percentage of bags in train dataset

seed: int

Seed to generate random numbers

Returns#

dataset_treinMIMLDataset

Train dataset

dataset_test:MIMLDataset

Test dataset