Module uim.utils.analyser

Expand source code
# -*- coding: utf-8 -*-
# Copyright © 2023 Wacom. All rights reserved.
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional

import numpy as np

from uim.model.helpers.policy import HandleMissingDataPolicy
from uim.model.ink import InkModel, InkTree, logger
from uim.model.inkdata.strokes import Stroke
from uim.model.inkinput.inputdata import InkSensorType, InputContext, SensorContext, SensorChannel
from uim.model.inkinput.sensordata import SensorData, ChannelData


def safe_zero_div(x: float, y: float) -> float:
    """
    Safely divide two numbers. If the denominator is zero, return zero.
    Parameters
    ----------
    x: float
        Numerator
    y: float
        Denominator

    Returns
    -------
    division: float
        x / y or 0. if y == 0.
    """
    try:
        return x / y
    except ZeroDivisionError:
        return 0.


class ModelAnalyzer(ABC):
    """
    Model analyzer
    ==============

    Abstract class for model analysis.
    """
    KNOWN_TYPE_PREDICATES: List[str] = ["@", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]
    """Known type predicates"""

    @staticmethod
    @abstractmethod
    def analyze(model: InkModel) -> Dict[str, Any]:
        """
        Analyze the model.
        Parameters
        ----------
        model: InkModel
            Ink model to analyze

        Returns
        -------
        summary: Dict[str, Any]
            Summary of the analysis
        """
        pass

    @staticmethod
    def __assume_view_type_predicate__(model: InkModel, view: InkTree) -> Optional[str]:
        statements = model.knowledge_graph.all_statements_for(subject=view.root.uri)
        for statement in statements:
            if statement.predicate in ModelAnalyzer.KNOWN_TYPE_PREDICATES:
                return statement.predicate
        return None

    @staticmethod
    def __extract_sensor_data_info__(model: InkModel, stroke: Stroke, stats: Dict[str, Any]):
        try:
            sd: SensorData = model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
            ic: InputContext = model.input_configuration.get_input_context(sd.input_context_id)
            sc: SensorContext = model.input_configuration.get_sensor_context(ic.sensor_context_id)
        except Exception as e:
            logger.error(f"Error while extracting sensor data info: {e}")
            return

        for scc in sc.sensor_channels_contexts:
            for channel in scc.channels:
                channel: SensorChannel = channel
                channel_type = channel.type
                existing_channel = stats['sensor_channels'].get(channel_type.name)

                if existing_channel is None:
                    stats['sensor_channels'][channel_type.name] = {
                        'strokes_count': 0, 'percent': 0, 'values': [], "metric": channel.metric.name,
                        "resolution": channel.resolution, "precision": channel.precision,
                        "channel_min": channel.min, "channel_max": channel.max
                    }

                stats['sensor_channels'][channel_type.name]['strokes_count'] += 1

                values = sd.get_data_by_id(channel.id).values
                if channel.type == InkSensorType.TIMESTAMP:
                    values = [v + sd.timestamp for v in values]

                stats['sensor_channels'][channel_type.name]['values'].extend(values)

    @staticmethod
    def __post_process_sensor_channels_info__(stats):
        for k, v in stats['sensor_channels'].items():
            stats['sensor_channels'][k]["min"] = min(v['values'])
            stats['sensor_channels'][k]["max"] = max(v['values'])
            stats['sensor_channels'][k]["mean"] = np.mean(v['values'])
            stats['sensor_channels'][k]["median"] = np.median(v['values'])
            stats['sensor_channels'][k].pop('values', None)


def get_channel_data_values(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) -> List[float]:
    """
    Get channel data values for a given stroke and sensor type.
    Parameters
    ----------
    ink_model: InkModel
        Ink model
    stroke: Stroke
        Stroke
    ink_sensor_type: InkSensorType
        Sensor type

    Returns
    -------
    channel_data: List[float]
        Channel data values
    """
    channel_data: Optional[ChannelData] = get_channel_data_instance(ink_model, stroke, ink_sensor_type)
    if channel_data is None:
        return []

    if ink_sensor_type == InkSensorType.TIMESTAMP:
        sd: SensorData = ink_model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
        return [v + sd.timestamp for v in channel_data.values]
    else:
        return channel_data.values.copy()


def get_channel_data_instance(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) \
        -> Optional[ChannelData]:
    """
    Get channel data instance for a given stroke and sensor type.
    Parameters
    ----------
    ink_model: InkModel
        Ink model
    stroke: Stroke
        Stroke
    ink_sensor_type: InkSensorType
        Sensor type

    Returns
    -------
    channel_data: Optional[ChannelData]
        Channel data instance
    """
    sd: SensorData = ink_model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
    sc: Optional[SensorChannel] = None
    input_context: InputContext = ink_model.input_configuration.get_input_context(sd.input_context_id)
    if input_context is not None:
        sensor_context = ink_model.input_configuration.get_sensor_context(input_context.sensor_context_id)
        if sensor_context is not None:

            if sensor_context.has_channel_type(ink_sensor_type):
                sc = sensor_context.get_channel_by_type(ink_sensor_type)

    if sd is None or sc is None or sd.get_data_by_id(sc.id) is None:
        return None
    else:
        return sd.get_data_by_id(sc.id)


def as_strided_array(ink_model: InkModel, stroke: Stroke, handle_missing_data=HandleMissingDataPolicy.FILL_WITH_ZEROS) \
        -> List[float]:
    # Remove the first and last element, which are added by the spline producer
    xs: List[float] = stroke.splines_x[1:-1]
    ys: List[float] = stroke.splines_y[1:-1]

    if stroke.sensor_data_id is None:
        ts: List[float] = []
        ps: List[float] = []
    else:
        ts: List[float] = get_channel_data_values(ink_model, stroke, InkSensorType.TIMESTAMP)
        ps: List[float] = get_channel_data_values(ink_model, stroke, InkSensorType.PRESSURE)

    # Handle missing timestamp according to policy
    if len(ts) == 0:
        if handle_missing_data == HandleMissingDataPolicy.FILL_WITH_ZEROS:
            ts = [0 for i in range(len(xs))]
        elif handle_missing_data == HandleMissingDataPolicy.FILL_WITH_NAN:
            NaN = float("NaN")
            ts = [NaN for i in range(len(xs))]
        elif handle_missing_data == HandleMissingDataPolicy.SKIP_STROKE:
            return None
        elif handle_missing_data == HandleMissingDataPolicy.THROW_EXCEPTION:
            raise ValueError("There is no timestamp data for this stroke.")

    target_len: int = len(ts) if len(ts) > 0 else len(xs)

    # Handle missing pressure according to policy
    if len(ps) == 0:
        if handle_missing_data == HandleMissingDataPolicy.FILL_WITH_ZEROS:
            ps = [0 for i in range(target_len)]
        elif handle_missing_data == HandleMissingDataPolicy.FILL_WITH_NAN:
            NaN: float = float("NaN")
            ps = [NaN for i in range(target_len)]
        elif handle_missing_data == HandleMissingDataPolicy.SKIP_STROKE:
            return None
        elif handle_missing_data == HandleMissingDataPolicy.THROW_EXCEPTION:
            raise ValueError("There is no pressure data for this stroke.")

    xs = xs[0:target_len]
    ys = ys[0:target_len]

    points: List[float] = []

    sensor_data_mapping = stroke.sensor_data_mapping

    if len(sensor_data_mapping) == 0:  # Mapping is 1:1
        limit: int = min(stroke.sensor_data_offset + len(xs), len(ts))
        sensor_data_mapping = range(stroke.sensor_data_offset, limit)

    i: int = 0

    for map_i in sensor_data_mapping:
        points.append(xs[i])
        points.append(ys[i])

        if len(ts) == 0:
            points.append(0)
        else:
            points.append(ts[map_i])

        if len(ps) == 0:
            points.append(0)
        else:
            points.append(ps[map_i])

        i += 1

    return points

Functions

def as_strided_array(ink_model: InkModel, stroke: Stroke, handle_missing_data=HandleMissingDataPolicy.FILL_WITH_ZEROS) ‑> List[float]
Expand source code
def as_strided_array(ink_model: InkModel, stroke: Stroke, handle_missing_data=HandleMissingDataPolicy.FILL_WITH_ZEROS) \
        -> List[float]:
    # Remove the first and last element, which are added by the spline producer
    xs: List[float] = stroke.splines_x[1:-1]
    ys: List[float] = stroke.splines_y[1:-1]

    if stroke.sensor_data_id is None:
        ts: List[float] = []
        ps: List[float] = []
    else:
        ts: List[float] = get_channel_data_values(ink_model, stroke, InkSensorType.TIMESTAMP)
        ps: List[float] = get_channel_data_values(ink_model, stroke, InkSensorType.PRESSURE)

    # Handle missing timestamp according to policy
    if len(ts) == 0:
        if handle_missing_data == HandleMissingDataPolicy.FILL_WITH_ZEROS:
            ts = [0 for i in range(len(xs))]
        elif handle_missing_data == HandleMissingDataPolicy.FILL_WITH_NAN:
            NaN = float("NaN")
            ts = [NaN for i in range(len(xs))]
        elif handle_missing_data == HandleMissingDataPolicy.SKIP_STROKE:
            return None
        elif handle_missing_data == HandleMissingDataPolicy.THROW_EXCEPTION:
            raise ValueError("There is no timestamp data for this stroke.")

    target_len: int = len(ts) if len(ts) > 0 else len(xs)

    # Handle missing pressure according to policy
    if len(ps) == 0:
        if handle_missing_data == HandleMissingDataPolicy.FILL_WITH_ZEROS:
            ps = [0 for i in range(target_len)]
        elif handle_missing_data == HandleMissingDataPolicy.FILL_WITH_NAN:
            NaN: float = float("NaN")
            ps = [NaN for i in range(target_len)]
        elif handle_missing_data == HandleMissingDataPolicy.SKIP_STROKE:
            return None
        elif handle_missing_data == HandleMissingDataPolicy.THROW_EXCEPTION:
            raise ValueError("There is no pressure data for this stroke.")

    xs = xs[0:target_len]
    ys = ys[0:target_len]

    points: List[float] = []

    sensor_data_mapping = stroke.sensor_data_mapping

    if len(sensor_data_mapping) == 0:  # Mapping is 1:1
        limit: int = min(stroke.sensor_data_offset + len(xs), len(ts))
        sensor_data_mapping = range(stroke.sensor_data_offset, limit)

    i: int = 0

    for map_i in sensor_data_mapping:
        points.append(xs[i])
        points.append(ys[i])

        if len(ts) == 0:
            points.append(0)
        else:
            points.append(ts[map_i])

        if len(ps) == 0:
            points.append(0)
        else:
            points.append(ps[map_i])

        i += 1

    return points
def get_channel_data_instance(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) ‑> Optional[ChannelData]

Get channel data instance for a given stroke and sensor type. Parameters


ink_model : InkModel
Ink model
stroke : Stroke
Stroke
ink_sensor_type : InkSensorType
Sensor type

Returns

channel_data : Optional[ChannelData]
Channel data instance
Expand source code
def get_channel_data_instance(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) \
        -> Optional[ChannelData]:
    """
    Get channel data instance for a given stroke and sensor type.
    Parameters
    ----------
    ink_model: InkModel
        Ink model
    stroke: Stroke
        Stroke
    ink_sensor_type: InkSensorType
        Sensor type

    Returns
    -------
    channel_data: Optional[ChannelData]
        Channel data instance
    """
    sd: SensorData = ink_model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
    sc: Optional[SensorChannel] = None
    input_context: InputContext = ink_model.input_configuration.get_input_context(sd.input_context_id)
    if input_context is not None:
        sensor_context = ink_model.input_configuration.get_sensor_context(input_context.sensor_context_id)
        if sensor_context is not None:

            if sensor_context.has_channel_type(ink_sensor_type):
                sc = sensor_context.get_channel_by_type(ink_sensor_type)

    if sd is None or sc is None or sd.get_data_by_id(sc.id) is None:
        return None
    else:
        return sd.get_data_by_id(sc.id)
def get_channel_data_values(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) ‑> List[float]

Get channel data values for a given stroke and sensor type. Parameters


ink_model : InkModel
Ink model
stroke : Stroke
Stroke
ink_sensor_type : InkSensorType
Sensor type

Returns

channel_data : List[float]
Channel data values
Expand source code
def get_channel_data_values(ink_model: InkModel, stroke: Stroke, ink_sensor_type: InkSensorType) -> List[float]:
    """
    Get channel data values for a given stroke and sensor type.
    Parameters
    ----------
    ink_model: InkModel
        Ink model
    stroke: Stroke
        Stroke
    ink_sensor_type: InkSensorType
        Sensor type

    Returns
    -------
    channel_data: List[float]
        Channel data values
    """
    channel_data: Optional[ChannelData] = get_channel_data_instance(ink_model, stroke, ink_sensor_type)
    if channel_data is None:
        return []

    if ink_sensor_type == InkSensorType.TIMESTAMP:
        sd: SensorData = ink_model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
        return [v + sd.timestamp for v in channel_data.values]
    else:
        return channel_data.values.copy()
def safe_zero_div(x: float, y: float) ‑> float

Safely divide two numbers. If the denominator is zero, return zero. Parameters


x : float
Numerator
y : float
Denominator

Returns

division : float
x / y or 0. if y == 0.
Expand source code
def safe_zero_div(x: float, y: float) -> float:
    """
    Safely divide two numbers. If the denominator is zero, return zero.
    Parameters
    ----------
    x: float
        Numerator
    y: float
        Denominator

    Returns
    -------
    division: float
        x / y or 0. if y == 0.
    """
    try:
        return x / y
    except ZeroDivisionError:
        return 0.

Classes

class ModelAnalyzer

Model analyzer

Abstract class for model analysis.

Expand source code
class ModelAnalyzer(ABC):
    """
    Model analyzer
    ==============

    Abstract class for model analysis.
    """
    KNOWN_TYPE_PREDICATES: List[str] = ["@", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]
    """Known type predicates"""

    @staticmethod
    @abstractmethod
    def analyze(model: InkModel) -> Dict[str, Any]:
        """
        Analyze the model.
        Parameters
        ----------
        model: InkModel
            Ink model to analyze

        Returns
        -------
        summary: Dict[str, Any]
            Summary of the analysis
        """
        pass

    @staticmethod
    def __assume_view_type_predicate__(model: InkModel, view: InkTree) -> Optional[str]:
        statements = model.knowledge_graph.all_statements_for(subject=view.root.uri)
        for statement in statements:
            if statement.predicate in ModelAnalyzer.KNOWN_TYPE_PREDICATES:
                return statement.predicate
        return None

    @staticmethod
    def __extract_sensor_data_info__(model: InkModel, stroke: Stroke, stats: Dict[str, Any]):
        try:
            sd: SensorData = model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
            ic: InputContext = model.input_configuration.get_input_context(sd.input_context_id)
            sc: SensorContext = model.input_configuration.get_sensor_context(ic.sensor_context_id)
        except Exception as e:
            logger.error(f"Error while extracting sensor data info: {e}")
            return

        for scc in sc.sensor_channels_contexts:
            for channel in scc.channels:
                channel: SensorChannel = channel
                channel_type = channel.type
                existing_channel = stats['sensor_channels'].get(channel_type.name)

                if existing_channel is None:
                    stats['sensor_channels'][channel_type.name] = {
                        'strokes_count': 0, 'percent': 0, 'values': [], "metric": channel.metric.name,
                        "resolution": channel.resolution, "precision": channel.precision,
                        "channel_min": channel.min, "channel_max": channel.max
                    }

                stats['sensor_channels'][channel_type.name]['strokes_count'] += 1

                values = sd.get_data_by_id(channel.id).values
                if channel.type == InkSensorType.TIMESTAMP:
                    values = [v + sd.timestamp for v in values]

                stats['sensor_channels'][channel_type.name]['values'].extend(values)

    @staticmethod
    def __post_process_sensor_channels_info__(stats):
        for k, v in stats['sensor_channels'].items():
            stats['sensor_channels'][k]["min"] = min(v['values'])
            stats['sensor_channels'][k]["max"] = max(v['values'])
            stats['sensor_channels'][k]["mean"] = np.mean(v['values'])
            stats['sensor_channels'][k]["median"] = np.median(v['values'])
            stats['sensor_channels'][k].pop('values', None)

Ancestors

  • abc.ABC

Subclasses

Class variables

var KNOWN_TYPE_PREDICATES : List[str]

Known type predicates

Static methods

def analyze(model: InkModel) ‑> Dict[str, Any]

Analyze the model. Parameters


model : InkModel
Ink model to analyze

Returns

summary : Dict[str, Any]
Summary of the analysis
Expand source code
@staticmethod
@abstractmethod
def analyze(model: InkModel) -> Dict[str, Any]:
    """
    Analyze the model.
    Parameters
    ----------
    model: InkModel
        Ink model to analyze

    Returns
    -------
    summary: Dict[str, Any]
        Summary of the analysis
    """
    pass