Module declare4py.declare4py

Expand source code
from .parsers import *
from .api_functions import *
import sys
import pm4py
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, apriori
from itertools import product


class Declare4Py:
    """
    Wrapper that collects the input log and model, the supported templates, the output for the discovery, conformance
    checking and query checking tasks. In addition, it contains the computed binary encoding and frequent item sets
    for the input log.

    Attributes
    ----------
    log : EventLog
        the input event log parsed from a XES file
    model : DeclModel
        the input DECLARE model parsed from a decl file
    log_length : int
        the trace number of the input log
    supported_templates : tuple[str]
        tuple containing all the DECLARE templates supported by the Declare4Py library
    binary_encoded_log : DataFrame
        the binary encoded version of the input log
    frequent_item_sets : DataFrame
        list of the most frequent item sets found along the log traces, together with their support and length
    conformance_checking_results : dict[tuple[int, str]: dict[str: CheckerResult]]
        output dictionary of the conformance_checking() function. Each entry contains:
        key = tuple[trace_pos_inside_log, trace_name]
        val = dict[ constraint_string : CheckerResult ]
    query_checking_results : dict[str: dict[str: str]]
        output dictionary of the query_checking() function. Each entry contains:
        key = constraint_string
        val = dict[ constraint_elem_key : constraint_elem_val ]
    discovery_results : dict[str: dict[tuple[int, str]: CheckerResult]]
        output dictionary of the discovery() function. Each entry contains:
        key = constraint_string
        val = dict[ tuple[trace_pos_inside_log, trace_name] : CheckerResult ]
    """

    def __init__(self):
        self.log = None
        self.model = None
        self.log_length = None
        self.supported_templates = tuple(map(lambda c: c.templ_str, Template))
        self.binary_encoded_log = None
        self.frequent_item_sets = None
        self.conformance_checking_results = None
        self.query_checking_results = None
        self.discovery_results = None

    # LOG MANAGEMENT UTILITIES
    def parse_xes_log(self, log_path: str) -> None:
        """
        Set the 'log' EventLog object and the 'log_length' integer by reading and parsing the log corresponding to
        given log file path.

        Parameters
        ----------
        log_path : str
            File path where the log is stored.
        """
        self.log = pm4py.read_xes(log_path)
        self.log_length = len(self.log)

    def activities_log_projection(self) -> list[list[str]]:
        """
        Return for each trace a time-ordered list of the activity names of the events.

        Returns
        -------
        projection
            nested lists, the outer one addresses traces while the inner one contains event activity names.
        """
        projection = []
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        for trace in self.log:
            tmp_trace = []
            for event in trace:
                tmp_trace.append(event["concept:name"])
            projection.append(tmp_trace)
        return projection

    def resources_log_projection(self) -> list[list[str]]:
        """
        Return for each trace a time-ordered list of the resources of the events.

        Returns
        -------
        projection
            nested lists, the outer one addresses traces while the inner one contains event activity names.
        """
        projection = []
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        for trace in self.log:
            tmp_trace = []
            for event in trace:
                tmp_trace.append(event["org:group"])
            projection.append(tmp_trace)
        return projection

    def log_encoding(self, dimension: str = 'act') -> pd.DataFrame:
        """
        Return the log binary encoding, i.e. the one-hot encoding stating whether an attribute is contained
        or not inside each trace of the log.

        Parameters
        ----------
        dimension : str, optional
            choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').

        Returns
        -------
        binary_encoded_log
            the one-hot encoding of the input log, made over activity names or resources depending on 'dimension' value.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        te = TransactionEncoder()
        if dimension == 'act':
            dataset = self.activities_log_projection()
        elif dimension == 'payload':
            dataset = self.resources_log_projection()
        else:
            raise RuntimeError(f"{dimension} dimension not supported. Choose between 'act' and 'payload'")
        te_ary = te.fit(dataset).transform(dataset)
        self.binary_encoded_log = pd.DataFrame(te_ary, columns=te.columns_)
        return self.binary_encoded_log

    def compute_frequent_itemsets(self, min_support: float, dimension: str = 'act', algorithm: str = 'fpgrowth',
                                  len_itemset: int = None) -> None:
        """
        Compute the most frequent item sets with a support greater or equal than 'min_support' with the given algorithm
        and over the given dimension.

        Parameters
        ----------
        min_support: float
            the minimum support of the returned item sets.
        dimension : str, optional
            choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').
        algorithm : str, optional
            the algorithm for extracting frequent itemsets, choose between 'fpgrowth' (default) and 'apriori'.
        len_itemset : int, optional
            the maximum length of the extracted itemsets.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")

        self.log_encoding(dimension)
        if algorithm == 'fpgrowth':
            frequent_itemsets = fpgrowth(self.binary_encoded_log, min_support=min_support, use_colnames=True)
        elif algorithm == 'apriori':
            frequent_itemsets = apriori(self.binary_encoded_log, min_support=min_support, use_colnames=True)
        else:
            raise RuntimeError(f"{algorithm} algorithm not supported. Choose between fpgrowth and apriori")
        frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
        if len_itemset is None:
            self.frequent_item_sets = frequent_itemsets
        else:
            self.frequent_item_sets = frequent_itemsets[(frequent_itemsets['length'] <= len_itemset)]

    def get_trace_keys(self) -> list[tuple[int, str]]:
        """
        Return the name of each trace, along with the position in the log.

        Returns
        -------
        trace_ids
            list containing the position in the log and the name of the trace.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        trace_ids = []
        for trace_id, trace in enumerate(self.log):
            trace_ids.append((trace_id, trace.attributes["concept:name"]))
        return trace_ids

    def get_log_length(self) -> int:
        """
        Return the number of traces contained in the log.

        Returns
        -------
        log_length
            the length of the log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        return self.log_length

    def get_log(self) -> pm4py.objects.log.obj.EventLog:
        """
        Return the log previously fed in input.

        Returns
        -------
        log
            the input log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        return self.log

    def get_log_alphabet_payload(self) -> set[str]:
        """
        Return the set of resources that are in the log.

        Returns
        -------
        resources
            resource set.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        resources = set()
        for trace in self.log:
            for event in trace:
                resources.add(event["org:group"])
        return resources

    def get_log_alphabet_activities(self):
        """
        Return the set of activities that are in the log.

        Returns
        -------
        activities
            activity set.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        activities = set()
        for trace in self.log:
            for event in trace:
                activities.add(event["concept:name"])
        return list(activities)

    def get_frequent_item_sets(self) -> pd.DataFrame:
        """
        Return the set of extracted frequent item sets.

        Returns
        -------
        frequent_item_sets
            the set of extracted frequent item sets.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must run the item set extraction algorithm before.")

        return self.frequent_item_sets

    def get_binary_encoded_log(self) -> pd.DataFrame:
        """
        Return the one-hot encoding of the log.

        Returns
        -------
        binary_encoded_log
            the one-hot encoded log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must run the item set extraction algorithm before.")

        return self.binary_encoded_log

    # DECLARE UTILITIES
    def parse_decl_model(self, model_path) -> None:
        """
        Parse the input DECLARE model.

        Parameters
        ----------
        model_path : str
            File path where the DECLARE model is stored.
        """
        self.model = parse_decl_from_file(model_path)

    def get_supported_templates(self) -> tuple[str, ...]:
        """
        Return the DECLARE templates supported by Declare4Py.

        Returns
        -------
        supported_templates
            tuple of names of the supported DECLARE templates.
        """
        return self.supported_templates

    def get_model_activities(self) -> list[str]:
        """
        Return the activities contained in the DECLARE model.

        Returns
        -------
        activities
            list of activity names contained in the DECLARE model.
        """
        if self.model is None:
            raise RuntimeError("You must load a DECLARE model before.")

        return self.model.activities

    def get_model_constraints(self) -> list[str]:
        """
        Return the constraints contained in the DECLARE model.

        Returns
        -------
        activities
            list of constraints contained in the DECLARE model.
        """
        if self.model is None:
            raise RuntimeError("You must load a DECLARE model before.")

        return self.model.get_decl_model_constraints()

    # PROCESS MINING TASKS
    def conformance_checking(self, consider_vacuity: bool) -> dict[tuple[int, str]: dict[str: CheckerResult]]:
        """
        Performs conformance checking for the provided event log and DECLARE model.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        Returns
        -------
        conformance_checking_results
            dictionary where the key is a list containing trace position inside the log and the trace name, the value is
            a dictionary with keys the names of the constraints and values a CheckerResult object containing
            the number of pendings, activations, violations, fulfilments and the truth value of the trace for that
            constraint.
        """
        print("Computing conformance checking ...")
        if self.log is None:
            raise RuntimeError("You must load the log before checking the model.")
        if self.model is None:
            raise RuntimeError("You must load the DECLARE model before checking the model.")

        self.conformance_checking_results = {}
        for i, trace in enumerate(self.log):
            trc_res = check_trace_conformance(trace, self.model, consider_vacuity)
            self.conformance_checking_results[(i, trace.attributes["concept:name"])] = trc_res

        return self.conformance_checking_results

    def discovery(self, consider_vacuity: bool, max_declare_cardinality: int = 3, output_path: str = None) \
            -> dict[str: dict[tuple[int, str]: CheckerResult]]:
        """
        Performs discovery of the supported DECLARE templates for the provided log by using the computed frequent item
        sets.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        max_declare_cardinality : int, optional
            the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 3).

        output_path : str, optional
            if specified, save the discovered constraints in a DECLARE model to the provided path.

        Returns
        -------
        discovery_results
            dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
            the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
            is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
        """
        print("Computing discovery ...")
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must discover frequent itemsets before.")
        if max_declare_cardinality <= 0:
            raise RuntimeError("Cardinality must be greater than 0.")

        self.discovery_results = {}

        for item_set in self.frequent_item_sets['itemsets']:
            length = len(item_set)

            if length == 1:
                for templ in Template.get_unary_templates():
                    constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "")}
                    if not templ.supports_cardinality:
                        self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)
                    else:
                        for i in range(max_declare_cardinality):
                            constraint['n'] = i+1
                            self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

            elif length == 2:
                for templ in Template.get_binary_templates():
                    constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "", "")}
                    self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

                    constraint['attributes'] = ', '.join(reversed(list(item_set)))
                    self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

        activities_decl_format = "activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n"
        if output_path is not None:
            with open(output_path, 'w') as f:
                f.write(activities_decl_format)
                f.write('\n'.join(self.discovery_results.keys()))

        return self.discovery_results

    def filter_discovery(self, min_support: float = 0, output_path: str = None) \
            -> dict[str: dict[tuple[int, str]: CheckerResult]]:
        """
        Filters discovery results by means of minimum support.

        Parameters
        ----------
        min_support : float, optional
            the minimum support that a discovered constraint needs to have to be included in the filtered result.

        output_path : str, optional
            if specified, save the filtered constraints in a DECLARE model to the provided path.

        Returns
        -------
        result
            dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
            the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
            is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.discovery_results is None:
            raise RuntimeError("You must run a Discovery task before.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")

        result = {}

        for key, val in self.discovery_results.items():
            support = len(val) / len(self.log)
            if support >= min_support:
                result[key] = support

        if output_path is not None:
            with open(output_path, 'w') as f:
                f.write("activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n")
                f.write('\n'.join(result.keys()))

        return result

    def query_checking(self, consider_vacuity: bool,
                       template_str: str = None, max_declare_cardinality: int = 1,
                       activation: str = None, target: str = None,
                       act_cond: str = None, trg_cond: str = None, time_cond: str = None,
                       min_support: float = 1.0, return_first: bool = False) -> dict[str: dict[str: str]]:
        """
        Performs query checking for a (list of) template, activation activity and target activity. Optional
        activation, target and time conditions can be specified.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        template_str : str, optional
            if specified, the query checking is restricted on this DECLARE template. If not, the query checking is
            performed over the whole set of supported templates.

        max_declare_cardinality : int, optional
            the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 1).

        activation : str, optional
            if specified, the query checking is restricted on this activation activity. If not, the query checking
            considers in turn each activity of the log as activation.

        target : str, optional
            if specified, the query checking is restricted on this target activity. If not, the query checking
            considers in turn each activity of the log as target.

        act_cond : str, optional
            optional activation condition to evaluate. It has to be written by following the DECLARE standard format.

        trg_cond : str, optional
            optional target condition to evaluate. It has to be written by following the DECLARE standard format.

        time_cond : str, optional
            optional time condition to evaluate. It has to be written by following the DECLARE standard format.

        min_support : float, optional
            the minimum support that a constraint needs to have to be included in the result (default 1).

        return_first : bool, optional
            if True, the algorithm returns only the first queried constraint that is above the minimum support. If
            False, the algorithm returns all the constraints above the min. support (default False).

        Returns
        -------
        query_checking_results
            dictionary with keys the DECLARE constraints satisfying the assignments. The values are a structured
            representations of these constraints.
        """
        print("Computing query checking ...")

        is_template_given = bool(template_str)
        is_activation_given = bool(activation)
        is_target_given = bool(target)
        if not act_cond:
            act_cond = ""
        if not trg_cond:
            trg_cond = ""
        if not time_cond:
            time_cond = ""

        if not is_template_given and not is_activation_given and not is_target_given:
            raise RuntimeError("You must set at least one parameter among (template, activation, target).")
        if is_template_given:
            template = Template.get_template_from_string(template_str)
            if template is None:
                raise RuntimeError("You must insert a supported DECLARE template.")
            if not template.is_binary and is_target_given:
                raise RuntimeError("You cannot specify a target activity for unary templates.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")
        if max_declare_cardinality <= 0:
            raise RuntimeError("Cardinality must be greater than 0.")
        if self.log is None:
            raise RuntimeError("You must load a log before.")

        templates_to_check = list()
        if is_template_given:
            templates_to_check.append(template_str)
        else:
            templates_to_check += list(map(lambda t: t.templ_str, Template.get_binary_templates()))
            if not is_target_given:
                for template in Template.get_unary_templates():
                    if template.supports_cardinality:
                        for card in range(max_declare_cardinality):
                            templates_to_check.append(template.templ_str + str(card+1))
                    else:
                        templates_to_check.append(template.templ_str)

        activations_to_check = self.get_log_alphabet_activities() if activation is None else [activation]
        targets_to_check = self.get_log_alphabet_activities() if target is None else [target]
        activity_combos = tuple(filter(lambda c: c[0] != c[1], product(activations_to_check, targets_to_check)))

        self.query_checking_results = {}

        for template_str in templates_to_check:
            template_str, cardinality = re.search(r'(^.+?)(\d*$)', template_str).groups()
            template = Template.get_template_from_string(template_str)

            constraint = {"template": template}
            if cardinality:
                constraint['n'] = int(cardinality)

            if template.is_binary:
                constraint['condition'] = (act_cond, trg_cond, time_cond)
                for couple in activity_combos:
                    constraint['attributes'] = ', '.join(couple)

                    constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                    if constraint_str:
                        res_value = {
                            "template": template_str, "activation": couple[0], "target": couple[1],
                            "act_cond": act_cond, "trg_cond": trg_cond, "time_cond": time_cond
                        }
                        self.query_checking_results[constraint_str] = res_value
                        if return_first:
                            return self.query_checking_results

            else:   # unary template
                constraint['condition'] = (act_cond, time_cond)
                for activity in activations_to_check:
                    constraint['attributes'] = activity

                    constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                    if constraint_str:
                        res_value = {
                            "template": template_str, "activation": activity,
                            "act_cond": act_cond, "time_cond": time_cond
                        }
                        self.query_checking_results[constraint_str] = res_value
                        if return_first:
                            return self.query_checking_results

        return self.query_checking_results

    def filter_query_checking(self, queries) -> list[list[str]]:
        """
        The function outputs, for each constraint of the query checking result, only the elements of the constraint
        specified in the 'queries' list.

        Parameters
        ----------
        queries : list[str]
            elements of the constraint that the user want to retain from query checking result. Choose one (or more)
            elements among: 'template', 'activation', 'target'.

        Returns
        -------
        assignments
            list containing an entry for each constraint of query checking result. Each entry of the list is a list
            itself, containing the queried constraint elements.
        """
        if self.query_checking_results is None:
            raise RuntimeError("You must run a query checking task before.")
        if len(queries) == 0 or len(queries) > 3:
            raise RuntimeError("The list of queries has to contain at least one query and three queries as maximum")
        assignments = []
        for constraint in self.query_checking_results.keys():
            tmp_answer = []
            for query in queries:
                try:
                    tmp_answer.append(self.query_checking_results[constraint][query])
                except KeyError:
                    print(f"{query} is not a valid query. Valid queries are template, activation, target.")
                    sys.exit(1)
            assignments.append(tmp_answer)
        return assignments

    # FUNCTIONS FOR PRINTING RESULTS ##############
    def print_conformance_results(self):
        if self.conformance_checking_results is None:
            raise RuntimeError("You must run conformance checking before!")

        for key, value in self.conformance_checking_results.items():
            print('Trace ID: ' + str(key[0]) + ' - "' + key[1] + '"')
            for item in value.items():
                print('\t' + item[1].state + '\ton ' + item[0])
            print()

Classes

class Declare4Py

Wrapper that collects the input log and model, the supported templates, the output for the discovery, conformance checking and query checking tasks. In addition, it contains the computed binary encoding and frequent item sets for the input log.

Attributes

log : EventLog
the input event log parsed from a XES file
model : DeclModel
the input DECLARE model parsed from a decl file
log_length : int
the trace number of the input log
supported_templates : tuple[str]
tuple containing all the DECLARE templates supported by the Declare4Py library
binary_encoded_log : DataFrame
the binary encoded version of the input log
frequent_item_sets : DataFrame
list of the most frequent item sets found along the log traces, together with their support and length
conformance_checking_results : dict[tuple[int, str]: dict[str: CheckerResult]]
output dictionary of the conformance_checking() function. Each entry contains: key = tuple[trace_pos_inside_log, trace_name] val = dict[ constraint_string : CheckerResult ]
query_checking_results : dict[str: dict[str: str]]
output dictionary of the query_checking() function. Each entry contains: key = constraint_string val = dict[ constraint_elem_key : constraint_elem_val ]
discovery_results : dict[str: dict[tuple[int, str]: CheckerResult]]
output dictionary of the discovery() function. Each entry contains: key = constraint_string val = dict[ tuple[trace_pos_inside_log, trace_name] : CheckerResult ]
Expand source code
class Declare4Py:
    """
    Wrapper that collects the input log and model, the supported templates, the output for the discovery, conformance
    checking and query checking tasks. In addition, it contains the computed binary encoding and frequent item sets
    for the input log.

    Attributes
    ----------
    log : EventLog
        the input event log parsed from a XES file
    model : DeclModel
        the input DECLARE model parsed from a decl file
    log_length : int
        the trace number of the input log
    supported_templates : tuple[str]
        tuple containing all the DECLARE templates supported by the Declare4Py library
    binary_encoded_log : DataFrame
        the binary encoded version of the input log
    frequent_item_sets : DataFrame
        list of the most frequent item sets found along the log traces, together with their support and length
    conformance_checking_results : dict[tuple[int, str]: dict[str: CheckerResult]]
        output dictionary of the conformance_checking() function. Each entry contains:
        key = tuple[trace_pos_inside_log, trace_name]
        val = dict[ constraint_string : CheckerResult ]
    query_checking_results : dict[str: dict[str: str]]
        output dictionary of the query_checking() function. Each entry contains:
        key = constraint_string
        val = dict[ constraint_elem_key : constraint_elem_val ]
    discovery_results : dict[str: dict[tuple[int, str]: CheckerResult]]
        output dictionary of the discovery() function. Each entry contains:
        key = constraint_string
        val = dict[ tuple[trace_pos_inside_log, trace_name] : CheckerResult ]
    """

    def __init__(self):
        self.log = None
        self.model = None
        self.log_length = None
        self.supported_templates = tuple(map(lambda c: c.templ_str, Template))
        self.binary_encoded_log = None
        self.frequent_item_sets = None
        self.conformance_checking_results = None
        self.query_checking_results = None
        self.discovery_results = None

    # LOG MANAGEMENT UTILITIES
    def parse_xes_log(self, log_path: str) -> None:
        """
        Set the 'log' EventLog object and the 'log_length' integer by reading and parsing the log corresponding to
        given log file path.

        Parameters
        ----------
        log_path : str
            File path where the log is stored.
        """
        self.log = pm4py.read_xes(log_path)
        self.log_length = len(self.log)

    def activities_log_projection(self) -> list[list[str]]:
        """
        Return for each trace a time-ordered list of the activity names of the events.

        Returns
        -------
        projection
            nested lists, the outer one addresses traces while the inner one contains event activity names.
        """
        projection = []
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        for trace in self.log:
            tmp_trace = []
            for event in trace:
                tmp_trace.append(event["concept:name"])
            projection.append(tmp_trace)
        return projection

    def resources_log_projection(self) -> list[list[str]]:
        """
        Return for each trace a time-ordered list of the resources of the events.

        Returns
        -------
        projection
            nested lists, the outer one addresses traces while the inner one contains event activity names.
        """
        projection = []
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        for trace in self.log:
            tmp_trace = []
            for event in trace:
                tmp_trace.append(event["org:group"])
            projection.append(tmp_trace)
        return projection

    def log_encoding(self, dimension: str = 'act') -> pd.DataFrame:
        """
        Return the log binary encoding, i.e. the one-hot encoding stating whether an attribute is contained
        or not inside each trace of the log.

        Parameters
        ----------
        dimension : str, optional
            choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').

        Returns
        -------
        binary_encoded_log
            the one-hot encoding of the input log, made over activity names or resources depending on 'dimension' value.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        te = TransactionEncoder()
        if dimension == 'act':
            dataset = self.activities_log_projection()
        elif dimension == 'payload':
            dataset = self.resources_log_projection()
        else:
            raise RuntimeError(f"{dimension} dimension not supported. Choose between 'act' and 'payload'")
        te_ary = te.fit(dataset).transform(dataset)
        self.binary_encoded_log = pd.DataFrame(te_ary, columns=te.columns_)
        return self.binary_encoded_log

    def compute_frequent_itemsets(self, min_support: float, dimension: str = 'act', algorithm: str = 'fpgrowth',
                                  len_itemset: int = None) -> None:
        """
        Compute the most frequent item sets with a support greater or equal than 'min_support' with the given algorithm
        and over the given dimension.

        Parameters
        ----------
        min_support: float
            the minimum support of the returned item sets.
        dimension : str, optional
            choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').
        algorithm : str, optional
            the algorithm for extracting frequent itemsets, choose between 'fpgrowth' (default) and 'apriori'.
        len_itemset : int, optional
            the maximum length of the extracted itemsets.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")

        self.log_encoding(dimension)
        if algorithm == 'fpgrowth':
            frequent_itemsets = fpgrowth(self.binary_encoded_log, min_support=min_support, use_colnames=True)
        elif algorithm == 'apriori':
            frequent_itemsets = apriori(self.binary_encoded_log, min_support=min_support, use_colnames=True)
        else:
            raise RuntimeError(f"{algorithm} algorithm not supported. Choose between fpgrowth and apriori")
        frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
        if len_itemset is None:
            self.frequent_item_sets = frequent_itemsets
        else:
            self.frequent_item_sets = frequent_itemsets[(frequent_itemsets['length'] <= len_itemset)]

    def get_trace_keys(self) -> list[tuple[int, str]]:
        """
        Return the name of each trace, along with the position in the log.

        Returns
        -------
        trace_ids
            list containing the position in the log and the name of the trace.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        trace_ids = []
        for trace_id, trace in enumerate(self.log):
            trace_ids.append((trace_id, trace.attributes["concept:name"]))
        return trace_ids

    def get_log_length(self) -> int:
        """
        Return the number of traces contained in the log.

        Returns
        -------
        log_length
            the length of the log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        return self.log_length

    def get_log(self) -> pm4py.objects.log.obj.EventLog:
        """
        Return the log previously fed in input.

        Returns
        -------
        log
            the input log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        return self.log

    def get_log_alphabet_payload(self) -> set[str]:
        """
        Return the set of resources that are in the log.

        Returns
        -------
        resources
            resource set.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        resources = set()
        for trace in self.log:
            for event in trace:
                resources.add(event["org:group"])
        return resources

    def get_log_alphabet_activities(self):
        """
        Return the set of activities that are in the log.

        Returns
        -------
        activities
            activity set.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        activities = set()
        for trace in self.log:
            for event in trace:
                activities.add(event["concept:name"])
        return list(activities)

    def get_frequent_item_sets(self) -> pd.DataFrame:
        """
        Return the set of extracted frequent item sets.

        Returns
        -------
        frequent_item_sets
            the set of extracted frequent item sets.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must run the item set extraction algorithm before.")

        return self.frequent_item_sets

    def get_binary_encoded_log(self) -> pd.DataFrame:
        """
        Return the one-hot encoding of the log.

        Returns
        -------
        binary_encoded_log
            the one-hot encoded log.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must run the item set extraction algorithm before.")

        return self.binary_encoded_log

    # DECLARE UTILITIES
    def parse_decl_model(self, model_path) -> None:
        """
        Parse the input DECLARE model.

        Parameters
        ----------
        model_path : str
            File path where the DECLARE model is stored.
        """
        self.model = parse_decl_from_file(model_path)

    def get_supported_templates(self) -> tuple[str, ...]:
        """
        Return the DECLARE templates supported by Declare4Py.

        Returns
        -------
        supported_templates
            tuple of names of the supported DECLARE templates.
        """
        return self.supported_templates

    def get_model_activities(self) -> list[str]:
        """
        Return the activities contained in the DECLARE model.

        Returns
        -------
        activities
            list of activity names contained in the DECLARE model.
        """
        if self.model is None:
            raise RuntimeError("You must load a DECLARE model before.")

        return self.model.activities

    def get_model_constraints(self) -> list[str]:
        """
        Return the constraints contained in the DECLARE model.

        Returns
        -------
        activities
            list of constraints contained in the DECLARE model.
        """
        if self.model is None:
            raise RuntimeError("You must load a DECLARE model before.")

        return self.model.get_decl_model_constraints()

    # PROCESS MINING TASKS
    def conformance_checking(self, consider_vacuity: bool) -> dict[tuple[int, str]: dict[str: CheckerResult]]:
        """
        Performs conformance checking for the provided event log and DECLARE model.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        Returns
        -------
        conformance_checking_results
            dictionary where the key is a list containing trace position inside the log and the trace name, the value is
            a dictionary with keys the names of the constraints and values a CheckerResult object containing
            the number of pendings, activations, violations, fulfilments and the truth value of the trace for that
            constraint.
        """
        print("Computing conformance checking ...")
        if self.log is None:
            raise RuntimeError("You must load the log before checking the model.")
        if self.model is None:
            raise RuntimeError("You must load the DECLARE model before checking the model.")

        self.conformance_checking_results = {}
        for i, trace in enumerate(self.log):
            trc_res = check_trace_conformance(trace, self.model, consider_vacuity)
            self.conformance_checking_results[(i, trace.attributes["concept:name"])] = trc_res

        return self.conformance_checking_results

    def discovery(self, consider_vacuity: bool, max_declare_cardinality: int = 3, output_path: str = None) \
            -> dict[str: dict[tuple[int, str]: CheckerResult]]:
        """
        Performs discovery of the supported DECLARE templates for the provided log by using the computed frequent item
        sets.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        max_declare_cardinality : int, optional
            the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 3).

        output_path : str, optional
            if specified, save the discovered constraints in a DECLARE model to the provided path.

        Returns
        -------
        discovery_results
            dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
            the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
            is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
        """
        print("Computing discovery ...")
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.frequent_item_sets is None:
            raise RuntimeError("You must discover frequent itemsets before.")
        if max_declare_cardinality <= 0:
            raise RuntimeError("Cardinality must be greater than 0.")

        self.discovery_results = {}

        for item_set in self.frequent_item_sets['itemsets']:
            length = len(item_set)

            if length == 1:
                for templ in Template.get_unary_templates():
                    constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "")}
                    if not templ.supports_cardinality:
                        self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)
                    else:
                        for i in range(max_declare_cardinality):
                            constraint['n'] = i+1
                            self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

            elif length == 2:
                for templ in Template.get_binary_templates():
                    constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "", "")}
                    self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

                    constraint['attributes'] = ', '.join(reversed(list(item_set)))
                    self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

        activities_decl_format = "activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n"
        if output_path is not None:
            with open(output_path, 'w') as f:
                f.write(activities_decl_format)
                f.write('\n'.join(self.discovery_results.keys()))

        return self.discovery_results

    def filter_discovery(self, min_support: float = 0, output_path: str = None) \
            -> dict[str: dict[tuple[int, str]: CheckerResult]]:
        """
        Filters discovery results by means of minimum support.

        Parameters
        ----------
        min_support : float, optional
            the minimum support that a discovered constraint needs to have to be included in the filtered result.

        output_path : str, optional
            if specified, save the filtered constraints in a DECLARE model to the provided path.

        Returns
        -------
        result
            dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
            the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
            is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
        """
        if self.log is None:
            raise RuntimeError("You must load a log before.")
        if self.discovery_results is None:
            raise RuntimeError("You must run a Discovery task before.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")

        result = {}

        for key, val in self.discovery_results.items():
            support = len(val) / len(self.log)
            if support >= min_support:
                result[key] = support

        if output_path is not None:
            with open(output_path, 'w') as f:
                f.write("activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n")
                f.write('\n'.join(result.keys()))

        return result

    def query_checking(self, consider_vacuity: bool,
                       template_str: str = None, max_declare_cardinality: int = 1,
                       activation: str = None, target: str = None,
                       act_cond: str = None, trg_cond: str = None, time_cond: str = None,
                       min_support: float = 1.0, return_first: bool = False) -> dict[str: dict[str: str]]:
        """
        Performs query checking for a (list of) template, activation activity and target activity. Optional
        activation, target and time conditions can be specified.

        Parameters
        ----------
        consider_vacuity : bool
            True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

        template_str : str, optional
            if specified, the query checking is restricted on this DECLARE template. If not, the query checking is
            performed over the whole set of supported templates.

        max_declare_cardinality : int, optional
            the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 1).

        activation : str, optional
            if specified, the query checking is restricted on this activation activity. If not, the query checking
            considers in turn each activity of the log as activation.

        target : str, optional
            if specified, the query checking is restricted on this target activity. If not, the query checking
            considers in turn each activity of the log as target.

        act_cond : str, optional
            optional activation condition to evaluate. It has to be written by following the DECLARE standard format.

        trg_cond : str, optional
            optional target condition to evaluate. It has to be written by following the DECLARE standard format.

        time_cond : str, optional
            optional time condition to evaluate. It has to be written by following the DECLARE standard format.

        min_support : float, optional
            the minimum support that a constraint needs to have to be included in the result (default 1).

        return_first : bool, optional
            if True, the algorithm returns only the first queried constraint that is above the minimum support. If
            False, the algorithm returns all the constraints above the min. support (default False).

        Returns
        -------
        query_checking_results
            dictionary with keys the DECLARE constraints satisfying the assignments. The values are a structured
            representations of these constraints.
        """
        print("Computing query checking ...")

        is_template_given = bool(template_str)
        is_activation_given = bool(activation)
        is_target_given = bool(target)
        if not act_cond:
            act_cond = ""
        if not trg_cond:
            trg_cond = ""
        if not time_cond:
            time_cond = ""

        if not is_template_given and not is_activation_given and not is_target_given:
            raise RuntimeError("You must set at least one parameter among (template, activation, target).")
        if is_template_given:
            template = Template.get_template_from_string(template_str)
            if template is None:
                raise RuntimeError("You must insert a supported DECLARE template.")
            if not template.is_binary and is_target_given:
                raise RuntimeError("You cannot specify a target activity for unary templates.")
        if not 0 <= min_support <= 1:
            raise RuntimeError("Min. support must be in range [0, 1].")
        if max_declare_cardinality <= 0:
            raise RuntimeError("Cardinality must be greater than 0.")
        if self.log is None:
            raise RuntimeError("You must load a log before.")

        templates_to_check = list()
        if is_template_given:
            templates_to_check.append(template_str)
        else:
            templates_to_check += list(map(lambda t: t.templ_str, Template.get_binary_templates()))
            if not is_target_given:
                for template in Template.get_unary_templates():
                    if template.supports_cardinality:
                        for card in range(max_declare_cardinality):
                            templates_to_check.append(template.templ_str + str(card+1))
                    else:
                        templates_to_check.append(template.templ_str)

        activations_to_check = self.get_log_alphabet_activities() if activation is None else [activation]
        targets_to_check = self.get_log_alphabet_activities() if target is None else [target]
        activity_combos = tuple(filter(lambda c: c[0] != c[1], product(activations_to_check, targets_to_check)))

        self.query_checking_results = {}

        for template_str in templates_to_check:
            template_str, cardinality = re.search(r'(^.+?)(\d*$)', template_str).groups()
            template = Template.get_template_from_string(template_str)

            constraint = {"template": template}
            if cardinality:
                constraint['n'] = int(cardinality)

            if template.is_binary:
                constraint['condition'] = (act_cond, trg_cond, time_cond)
                for couple in activity_combos:
                    constraint['attributes'] = ', '.join(couple)

                    constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                    if constraint_str:
                        res_value = {
                            "template": template_str, "activation": couple[0], "target": couple[1],
                            "act_cond": act_cond, "trg_cond": trg_cond, "time_cond": time_cond
                        }
                        self.query_checking_results[constraint_str] = res_value
                        if return_first:
                            return self.query_checking_results

            else:   # unary template
                constraint['condition'] = (act_cond, time_cond)
                for activity in activations_to_check:
                    constraint['attributes'] = activity

                    constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                    if constraint_str:
                        res_value = {
                            "template": template_str, "activation": activity,
                            "act_cond": act_cond, "time_cond": time_cond
                        }
                        self.query_checking_results[constraint_str] = res_value
                        if return_first:
                            return self.query_checking_results

        return self.query_checking_results

    def filter_query_checking(self, queries) -> list[list[str]]:
        """
        The function outputs, for each constraint of the query checking result, only the elements of the constraint
        specified in the 'queries' list.

        Parameters
        ----------
        queries : list[str]
            elements of the constraint that the user want to retain from query checking result. Choose one (or more)
            elements among: 'template', 'activation', 'target'.

        Returns
        -------
        assignments
            list containing an entry for each constraint of query checking result. Each entry of the list is a list
            itself, containing the queried constraint elements.
        """
        if self.query_checking_results is None:
            raise RuntimeError("You must run a query checking task before.")
        if len(queries) == 0 or len(queries) > 3:
            raise RuntimeError("The list of queries has to contain at least one query and three queries as maximum")
        assignments = []
        for constraint in self.query_checking_results.keys():
            tmp_answer = []
            for query in queries:
                try:
                    tmp_answer.append(self.query_checking_results[constraint][query])
                except KeyError:
                    print(f"{query} is not a valid query. Valid queries are template, activation, target.")
                    sys.exit(1)
            assignments.append(tmp_answer)
        return assignments

    # FUNCTIONS FOR PRINTING RESULTS ##############
    def print_conformance_results(self):
        if self.conformance_checking_results is None:
            raise RuntimeError("You must run conformance checking before!")

        for key, value in self.conformance_checking_results.items():
            print('Trace ID: ' + str(key[0]) + ' - "' + key[1] + '"')
            for item in value.items():
                print('\t' + item[1].state + '\ton ' + item[0])
            print()

Methods

def activities_log_projection(self) ‑> list[list[str]]

Return for each trace a time-ordered list of the activity names of the events.

Returns

projection
nested lists, the outer one addresses traces while the inner one contains event activity names.
Expand source code
def activities_log_projection(self) -> list[list[str]]:
    """
    Return for each trace a time-ordered list of the activity names of the events.

    Returns
    -------
    projection
        nested lists, the outer one addresses traces while the inner one contains event activity names.
    """
    projection = []
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    for trace in self.log:
        tmp_trace = []
        for event in trace:
            tmp_trace.append(event["concept:name"])
        projection.append(tmp_trace)
    return projection
def compute_frequent_itemsets(self, min_support: float, dimension: str = 'act', algorithm: str = 'fpgrowth', len_itemset: int = None) ‑> None

Compute the most frequent item sets with a support greater or equal than 'min_support' with the given algorithm and over the given dimension.

Parameters

min_support : float
the minimum support of the returned item sets.
dimension : str, optional
choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').
algorithm : str, optional
the algorithm for extracting frequent itemsets, choose between 'fpgrowth' (default) and 'apriori'.
len_itemset : int, optional
the maximum length of the extracted itemsets.
Expand source code
def compute_frequent_itemsets(self, min_support: float, dimension: str = 'act', algorithm: str = 'fpgrowth',
                              len_itemset: int = None) -> None:
    """
    Compute the most frequent item sets with a support greater or equal than 'min_support' with the given algorithm
    and over the given dimension.

    Parameters
    ----------
    min_support: float
        the minimum support of the returned item sets.
    dimension : str, optional
        choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').
    algorithm : str, optional
        the algorithm for extracting frequent itemsets, choose between 'fpgrowth' (default) and 'apriori'.
    len_itemset : int, optional
        the maximum length of the extracted itemsets.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    if not 0 <= min_support <= 1:
        raise RuntimeError("Min. support must be in range [0, 1].")

    self.log_encoding(dimension)
    if algorithm == 'fpgrowth':
        frequent_itemsets = fpgrowth(self.binary_encoded_log, min_support=min_support, use_colnames=True)
    elif algorithm == 'apriori':
        frequent_itemsets = apriori(self.binary_encoded_log, min_support=min_support, use_colnames=True)
    else:
        raise RuntimeError(f"{algorithm} algorithm not supported. Choose between fpgrowth and apriori")
    frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
    if len_itemset is None:
        self.frequent_item_sets = frequent_itemsets
    else:
        self.frequent_item_sets = frequent_itemsets[(frequent_itemsets['length'] <= len_itemset)]
def conformance_checking(self, consider_vacuity: bool) ‑> dict[slice(tuple[int, str], dict[slice(CheckerResult'>, None)], None)]

Performs conformance checking for the provided event log and DECLARE model.

Parameters

consider_vacuity : bool
True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

Returns

conformance_checking_results
dictionary where the key is a list containing trace position inside the log and the trace name, the value is a dictionary with keys the names of the constraints and values a CheckerResult object containing the number of pendings, activations, violations, fulfilments and the truth value of the trace for that constraint.
Expand source code
def conformance_checking(self, consider_vacuity: bool) -> dict[tuple[int, str]: dict[str: CheckerResult]]:
    """
    Performs conformance checking for the provided event log and DECLARE model.

    Parameters
    ----------
    consider_vacuity : bool
        True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

    Returns
    -------
    conformance_checking_results
        dictionary where the key is a list containing trace position inside the log and the trace name, the value is
        a dictionary with keys the names of the constraints and values a CheckerResult object containing
        the number of pendings, activations, violations, fulfilments and the truth value of the trace for that
        constraint.
    """
    print("Computing conformance checking ...")
    if self.log is None:
        raise RuntimeError("You must load the log before checking the model.")
    if self.model is None:
        raise RuntimeError("You must load the DECLARE model before checking the model.")

    self.conformance_checking_results = {}
    for i, trace in enumerate(self.log):
        trc_res = check_trace_conformance(trace, self.model, consider_vacuity)
        self.conformance_checking_results[(i, trace.attributes["concept:name"])] = trc_res

    return self.conformance_checking_results
def discovery(self, consider_vacuity: bool, max_declare_cardinality: int = 3, output_path: str = None) ‑> dict[slice(, dict[slice(tuple[int, str], CheckerResult'>, None)], None)]

Performs discovery of the supported DECLARE templates for the provided log by using the computed frequent item sets.

Parameters

consider_vacuity : bool
True means that vacuously satisfied traces are considered as satisfied, violated otherwise.
max_declare_cardinality : int, optional
the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 3).
output_path : str, optional
if specified, save the discovered constraints in a DECLARE model to the provided path.

Returns

discovery_results
dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
Expand source code
def discovery(self, consider_vacuity: bool, max_declare_cardinality: int = 3, output_path: str = None) \
        -> dict[str: dict[tuple[int, str]: CheckerResult]]:
    """
    Performs discovery of the supported DECLARE templates for the provided log by using the computed frequent item
    sets.

    Parameters
    ----------
    consider_vacuity : bool
        True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

    max_declare_cardinality : int, optional
        the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 3).

    output_path : str, optional
        if specified, save the discovered constraints in a DECLARE model to the provided path.

    Returns
    -------
    discovery_results
        dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
        the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
        is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
    """
    print("Computing discovery ...")
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    if self.frequent_item_sets is None:
        raise RuntimeError("You must discover frequent itemsets before.")
    if max_declare_cardinality <= 0:
        raise RuntimeError("Cardinality must be greater than 0.")

    self.discovery_results = {}

    for item_set in self.frequent_item_sets['itemsets']:
        length = len(item_set)

        if length == 1:
            for templ in Template.get_unary_templates():
                constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "")}
                if not templ.supports_cardinality:
                    self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)
                else:
                    for i in range(max_declare_cardinality):
                        constraint['n'] = i+1
                        self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

        elif length == 2:
            for templ in Template.get_binary_templates():
                constraint = {"template": templ, "attributes": ', '.join(item_set), "condition": ("", "", "")}
                self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

                constraint['attributes'] = ', '.join(reversed(list(item_set)))
                self.discovery_results |= discover_constraint(self.log, constraint, consider_vacuity)

    activities_decl_format = "activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n"
    if output_path is not None:
        with open(output_path, 'w') as f:
            f.write(activities_decl_format)
            f.write('\n'.join(self.discovery_results.keys()))

    return self.discovery_results
def filter_discovery(self, min_support: float = 0, output_path: str = None) ‑> dict[slice(, dict[slice(tuple[int, str], CheckerResult'>, None)], None)]

Filters discovery results by means of minimum support.

Parameters

min_support : float, optional
the minimum support that a discovered constraint needs to have to be included in the filtered result.
output_path : str, optional
if specified, save the filtered constraints in a DECLARE model to the provided path.

Returns

result
dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
Expand source code
def filter_discovery(self, min_support: float = 0, output_path: str = None) \
        -> dict[str: dict[tuple[int, str]: CheckerResult]]:
    """
    Filters discovery results by means of minimum support.

    Parameters
    ----------
    min_support : float, optional
        the minimum support that a discovered constraint needs to have to be included in the filtered result.

    output_path : str, optional
        if specified, save the filtered constraints in a DECLARE model to the provided path.

    Returns
    -------
    result
        dictionary containing the results indexed by discovered constraints. The value is a dictionary with keys
        the tuples containing id and name of traces that satisfy the constraint. The values of this inner dictionary
        is a CheckerResult object containing the number of pendings, activations, violations, fulfilments.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    if self.discovery_results is None:
        raise RuntimeError("You must run a Discovery task before.")
    if not 0 <= min_support <= 1:
        raise RuntimeError("Min. support must be in range [0, 1].")

    result = {}

    for key, val in self.discovery_results.items():
        support = len(val) / len(self.log)
        if support >= min_support:
            result[key] = support

    if output_path is not None:
        with open(output_path, 'w') as f:
            f.write("activity " + "\nactivity ".join(self.get_log_alphabet_activities()) + "\n")
            f.write('\n'.join(result.keys()))

    return result
def filter_query_checking(self, queries) ‑> list[list[str]]

The function outputs, for each constraint of the query checking result, only the elements of the constraint specified in the 'queries' list.

Parameters

queries : list[str]
elements of the constraint that the user want to retain from query checking result. Choose one (or more) elements among: 'template', 'activation', 'target'.

Returns

assignments
list containing an entry for each constraint of query checking result. Each entry of the list is a list itself, containing the queried constraint elements.
Expand source code
def filter_query_checking(self, queries) -> list[list[str]]:
    """
    The function outputs, for each constraint of the query checking result, only the elements of the constraint
    specified in the 'queries' list.

    Parameters
    ----------
    queries : list[str]
        elements of the constraint that the user want to retain from query checking result. Choose one (or more)
        elements among: 'template', 'activation', 'target'.

    Returns
    -------
    assignments
        list containing an entry for each constraint of query checking result. Each entry of the list is a list
        itself, containing the queried constraint elements.
    """
    if self.query_checking_results is None:
        raise RuntimeError("You must run a query checking task before.")
    if len(queries) == 0 or len(queries) > 3:
        raise RuntimeError("The list of queries has to contain at least one query and three queries as maximum")
    assignments = []
    for constraint in self.query_checking_results.keys():
        tmp_answer = []
        for query in queries:
            try:
                tmp_answer.append(self.query_checking_results[constraint][query])
            except KeyError:
                print(f"{query} is not a valid query. Valid queries are template, activation, target.")
                sys.exit(1)
        assignments.append(tmp_answer)
    return assignments
def get_binary_encoded_log(self) ‑> pandas.core.frame.DataFrame

Return the one-hot encoding of the log.

Returns

binary_encoded_log
the one-hot encoded log.
Expand source code
def get_binary_encoded_log(self) -> pd.DataFrame:
    """
    Return the one-hot encoding of the log.

    Returns
    -------
    binary_encoded_log
        the one-hot encoded log.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    if self.frequent_item_sets is None:
        raise RuntimeError("You must run the item set extraction algorithm before.")

    return self.binary_encoded_log
def get_frequent_item_sets(self) ‑> pandas.core.frame.DataFrame

Return the set of extracted frequent item sets.

Returns

frequent_item_sets
the set of extracted frequent item sets.
Expand source code
def get_frequent_item_sets(self) -> pd.DataFrame:
    """
    Return the set of extracted frequent item sets.

    Returns
    -------
    frequent_item_sets
        the set of extracted frequent item sets.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    if self.frequent_item_sets is None:
        raise RuntimeError("You must run the item set extraction algorithm before.")

    return self.frequent_item_sets
def get_log(self) ‑> pm4py.objects.log.obj.EventLog

Return the log previously fed in input.

Returns

log
the input log.
Expand source code
def get_log(self) -> pm4py.objects.log.obj.EventLog:
    """
    Return the log previously fed in input.

    Returns
    -------
    log
        the input log.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    return self.log
def get_log_alphabet_activities(self)

Return the set of activities that are in the log.

Returns

activities
activity set.
Expand source code
def get_log_alphabet_activities(self):
    """
    Return the set of activities that are in the log.

    Returns
    -------
    activities
        activity set.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    activities = set()
    for trace in self.log:
        for event in trace:
            activities.add(event["concept:name"])
    return list(activities)
def get_log_alphabet_payload(self) ‑> set[str]

Return the set of resources that are in the log.

Returns

resources
resource set.
Expand source code
def get_log_alphabet_payload(self) -> set[str]:
    """
    Return the set of resources that are in the log.

    Returns
    -------
    resources
        resource set.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    resources = set()
    for trace in self.log:
        for event in trace:
            resources.add(event["org:group"])
    return resources
def get_log_length(self) ‑> int

Return the number of traces contained in the log.

Returns

log_length
the length of the log.
Expand source code
def get_log_length(self) -> int:
    """
    Return the number of traces contained in the log.

    Returns
    -------
    log_length
        the length of the log.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    return self.log_length
def get_model_activities(self) ‑> list[str]

Return the activities contained in the DECLARE model.

Returns

activities
list of activity names contained in the DECLARE model.
Expand source code
def get_model_activities(self) -> list[str]:
    """
    Return the activities contained in the DECLARE model.

    Returns
    -------
    activities
        list of activity names contained in the DECLARE model.
    """
    if self.model is None:
        raise RuntimeError("You must load a DECLARE model before.")

    return self.model.activities
def get_model_constraints(self) ‑> list[str]

Return the constraints contained in the DECLARE model.

Returns

activities
list of constraints contained in the DECLARE model.
Expand source code
def get_model_constraints(self) -> list[str]:
    """
    Return the constraints contained in the DECLARE model.

    Returns
    -------
    activities
        list of constraints contained in the DECLARE model.
    """
    if self.model is None:
        raise RuntimeError("You must load a DECLARE model before.")

    return self.model.get_decl_model_constraints()
def get_supported_templates(self) ‑> tuple[str, ...]

Return the DECLARE templates supported by Declare4Py.

Returns

supported_templates
tuple of names of the supported DECLARE templates.
Expand source code
def get_supported_templates(self) -> tuple[str, ...]:
    """
    Return the DECLARE templates supported by Declare4Py.

    Returns
    -------
    supported_templates
        tuple of names of the supported DECLARE templates.
    """
    return self.supported_templates
def get_trace_keys(self) ‑> list[tuple[int, str]]

Return the name of each trace, along with the position in the log.

Returns

trace_ids
list containing the position in the log and the name of the trace.
Expand source code
def get_trace_keys(self) -> list[tuple[int, str]]:
    """
    Return the name of each trace, along with the position in the log.

    Returns
    -------
    trace_ids
        list containing the position in the log and the name of the trace.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    trace_ids = []
    for trace_id, trace in enumerate(self.log):
        trace_ids.append((trace_id, trace.attributes["concept:name"]))
    return trace_ids
def log_encoding(self, dimension: str = 'act') ‑> pandas.core.frame.DataFrame

Return the log binary encoding, i.e. the one-hot encoding stating whether an attribute is contained or not inside each trace of the log.

Parameters

dimension : str, optional
choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').

Returns

binary_encoded_log
the one-hot encoding of the input log, made over activity names or resources depending on 'dimension' value.
Expand source code
def log_encoding(self, dimension: str = 'act') -> pd.DataFrame:
    """
    Return the log binary encoding, i.e. the one-hot encoding stating whether an attribute is contained
    or not inside each trace of the log.

    Parameters
    ----------
    dimension : str, optional
        choose 'act' to perform the encoding over activity names, 'payload' over resources (default 'act').

    Returns
    -------
    binary_encoded_log
        the one-hot encoding of the input log, made over activity names or resources depending on 'dimension' value.
    """
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    te = TransactionEncoder()
    if dimension == 'act':
        dataset = self.activities_log_projection()
    elif dimension == 'payload':
        dataset = self.resources_log_projection()
    else:
        raise RuntimeError(f"{dimension} dimension not supported. Choose between 'act' and 'payload'")
    te_ary = te.fit(dataset).transform(dataset)
    self.binary_encoded_log = pd.DataFrame(te_ary, columns=te.columns_)
    return self.binary_encoded_log
def parse_decl_model(self, model_path) ‑> None

Parse the input DECLARE model.

Parameters

model_path : str
File path where the DECLARE model is stored.
Expand source code
def parse_decl_model(self, model_path) -> None:
    """
    Parse the input DECLARE model.

    Parameters
    ----------
    model_path : str
        File path where the DECLARE model is stored.
    """
    self.model = parse_decl_from_file(model_path)
def parse_xes_log(self, log_path: str) ‑> None

Set the 'log' EventLog object and the 'log_length' integer by reading and parsing the log corresponding to given log file path.

Parameters

log_path : str
File path where the log is stored.
Expand source code
def parse_xes_log(self, log_path: str) -> None:
    """
    Set the 'log' EventLog object and the 'log_length' integer by reading and parsing the log corresponding to
    given log file path.

    Parameters
    ----------
    log_path : str
        File path where the log is stored.
    """
    self.log = pm4py.read_xes(log_path)
    self.log_length = len(self.log)
def print_conformance_results(self)
Expand source code
def print_conformance_results(self):
    if self.conformance_checking_results is None:
        raise RuntimeError("You must run conformance checking before!")

    for key, value in self.conformance_checking_results.items():
        print('Trace ID: ' + str(key[0]) + ' - "' + key[1] + '"')
        for item in value.items():
            print('\t' + item[1].state + '\ton ' + item[0])
        print()
def query_checking(self, consider_vacuity: bool, template_str: str = None, max_declare_cardinality: int = 1, activation: str = None, target: str = None, act_cond: str = None, trg_cond: str = None, time_cond: str = None, min_support: float = 1.0, return_first: bool = False) ‑> dict[slice(, dict[slice(, None)], None)]

Performs query checking for a (list of) template, activation activity and target activity. Optional activation, target and time conditions can be specified.

Parameters

consider_vacuity : bool
True means that vacuously satisfied traces are considered as satisfied, violated otherwise.
template_str : str, optional
if specified, the query checking is restricted on this DECLARE template. If not, the query checking is performed over the whole set of supported templates.
max_declare_cardinality : int, optional
the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 1).
activation : str, optional
if specified, the query checking is restricted on this activation activity. If not, the query checking considers in turn each activity of the log as activation.
target : str, optional
if specified, the query checking is restricted on this target activity. If not, the query checking considers in turn each activity of the log as target.
act_cond : str, optional
optional activation condition to evaluate. It has to be written by following the DECLARE standard format.
trg_cond : str, optional
optional target condition to evaluate. It has to be written by following the DECLARE standard format.
time_cond : str, optional
optional time condition to evaluate. It has to be written by following the DECLARE standard format.
min_support : float, optional
the minimum support that a constraint needs to have to be included in the result (default 1).
return_first : bool, optional
if True, the algorithm returns only the first queried constraint that is above the minimum support. If False, the algorithm returns all the constraints above the min. support (default False).

Returns

query_checking_results
dictionary with keys the DECLARE constraints satisfying the assignments. The values are a structured representations of these constraints.
Expand source code
def query_checking(self, consider_vacuity: bool,
                   template_str: str = None, max_declare_cardinality: int = 1,
                   activation: str = None, target: str = None,
                   act_cond: str = None, trg_cond: str = None, time_cond: str = None,
                   min_support: float = 1.0, return_first: bool = False) -> dict[str: dict[str: str]]:
    """
    Performs query checking for a (list of) template, activation activity and target activity. Optional
    activation, target and time conditions can be specified.

    Parameters
    ----------
    consider_vacuity : bool
        True means that vacuously satisfied traces are considered as satisfied, violated otherwise.

    template_str : str, optional
        if specified, the query checking is restricted on this DECLARE template. If not, the query checking is
        performed over the whole set of supported templates.

    max_declare_cardinality : int, optional
        the maximum cardinality that the algorithm checks for DECLARE templates supporting it (default 1).

    activation : str, optional
        if specified, the query checking is restricted on this activation activity. If not, the query checking
        considers in turn each activity of the log as activation.

    target : str, optional
        if specified, the query checking is restricted on this target activity. If not, the query checking
        considers in turn each activity of the log as target.

    act_cond : str, optional
        optional activation condition to evaluate. It has to be written by following the DECLARE standard format.

    trg_cond : str, optional
        optional target condition to evaluate. It has to be written by following the DECLARE standard format.

    time_cond : str, optional
        optional time condition to evaluate. It has to be written by following the DECLARE standard format.

    min_support : float, optional
        the minimum support that a constraint needs to have to be included in the result (default 1).

    return_first : bool, optional
        if True, the algorithm returns only the first queried constraint that is above the minimum support. If
        False, the algorithm returns all the constraints above the min. support (default False).

    Returns
    -------
    query_checking_results
        dictionary with keys the DECLARE constraints satisfying the assignments. The values are a structured
        representations of these constraints.
    """
    print("Computing query checking ...")

    is_template_given = bool(template_str)
    is_activation_given = bool(activation)
    is_target_given = bool(target)
    if not act_cond:
        act_cond = ""
    if not trg_cond:
        trg_cond = ""
    if not time_cond:
        time_cond = ""

    if not is_template_given and not is_activation_given and not is_target_given:
        raise RuntimeError("You must set at least one parameter among (template, activation, target).")
    if is_template_given:
        template = Template.get_template_from_string(template_str)
        if template is None:
            raise RuntimeError("You must insert a supported DECLARE template.")
        if not template.is_binary and is_target_given:
            raise RuntimeError("You cannot specify a target activity for unary templates.")
    if not 0 <= min_support <= 1:
        raise RuntimeError("Min. support must be in range [0, 1].")
    if max_declare_cardinality <= 0:
        raise RuntimeError("Cardinality must be greater than 0.")
    if self.log is None:
        raise RuntimeError("You must load a log before.")

    templates_to_check = list()
    if is_template_given:
        templates_to_check.append(template_str)
    else:
        templates_to_check += list(map(lambda t: t.templ_str, Template.get_binary_templates()))
        if not is_target_given:
            for template in Template.get_unary_templates():
                if template.supports_cardinality:
                    for card in range(max_declare_cardinality):
                        templates_to_check.append(template.templ_str + str(card+1))
                else:
                    templates_to_check.append(template.templ_str)

    activations_to_check = self.get_log_alphabet_activities() if activation is None else [activation]
    targets_to_check = self.get_log_alphabet_activities() if target is None else [target]
    activity_combos = tuple(filter(lambda c: c[0] != c[1], product(activations_to_check, targets_to_check)))

    self.query_checking_results = {}

    for template_str in templates_to_check:
        template_str, cardinality = re.search(r'(^.+?)(\d*$)', template_str).groups()
        template = Template.get_template_from_string(template_str)

        constraint = {"template": template}
        if cardinality:
            constraint['n'] = int(cardinality)

        if template.is_binary:
            constraint['condition'] = (act_cond, trg_cond, time_cond)
            for couple in activity_combos:
                constraint['attributes'] = ', '.join(couple)

                constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                if constraint_str:
                    res_value = {
                        "template": template_str, "activation": couple[0], "target": couple[1],
                        "act_cond": act_cond, "trg_cond": trg_cond, "time_cond": time_cond
                    }
                    self.query_checking_results[constraint_str] = res_value
                    if return_first:
                        return self.query_checking_results

        else:   # unary template
            constraint['condition'] = (act_cond, time_cond)
            for activity in activations_to_check:
                constraint['attributes'] = activity

                constraint_str = query_constraint(self.log, constraint, consider_vacuity, min_support)
                if constraint_str:
                    res_value = {
                        "template": template_str, "activation": activity,
                        "act_cond": act_cond, "time_cond": time_cond
                    }
                    self.query_checking_results[constraint_str] = res_value
                    if return_first:
                        return self.query_checking_results

    return self.query_checking_results
def resources_log_projection(self) ‑> list[list[str]]

Return for each trace a time-ordered list of the resources of the events.

Returns

projection
nested lists, the outer one addresses traces while the inner one contains event activity names.
Expand source code
def resources_log_projection(self) -> list[list[str]]:
    """
    Return for each trace a time-ordered list of the resources of the events.

    Returns
    -------
    projection
        nested lists, the outer one addresses traces while the inner one contains event activity names.
    """
    projection = []
    if self.log is None:
        raise RuntimeError("You must load a log before.")
    for trace in self.log:
        tmp_trace = []
        for event in trace:
            tmp_trace.append(event["org:group"])
        projection.append(tmp_trace)
    return projection