Module uim.utils.statistics
Expand source code
# -*- coding: utf-8 -*-
# Copyright © 2023 Wacom. All rights reserved.
import re
import statistics
from typing import Dict, Any, Optional, List
from uim.utils.analyser import ModelAnalyzer, safe_zero_div, as_strided_array
from uim.model.helpers.treeiterator import PreOrderEnumerator
from uim.model.ink import InkModel, InkTree, logger
from uim.model.inkdata.strokes import Stroke, Style
from uim.model.inkinput.inputdata import InputContext, Environment, SensorContext, InputDevice, \
InkInputProvider
from uim.model.inkinput.sensordata import SensorData
from uim.model.semantics.node import StrokeGroupNode
from uim.model.semantics.structures import BoundingBox
from uim.model.semantics.schema import TripleStore
class StatisticsAnalyzer(ModelAnalyzer):
"""
Statistics analyzer
===================
Analyze the model and compute statistics.
"""
@staticmethod
def merge_stats(*stats):
pass
@staticmethod
def summarize(stats, verbose=False):
pass
@staticmethod
def analyze(model: InkModel, ignore_predicates: Optional[List[str]] = None,
ignore_properties: Optional[List[str]] = None):
"""
Analyze the model and compute statistics.
Parameters
----------
model: InkModel
Ink model to analyze.
ignore_predicates: Optional[List[str]]
List of predicates to ignore.
ignore_properties: Optional[List[str]]
List of properties to ignore.
"""
# Init stats
stats: Dict[str, Any] = {
"envs": {}, "input_devices": {}, "input_providers": {}, "brushes": {},
"properties": StatisticsAnalyzer.__extract_properties_info(model, ignore_properties),
"sampling_rate": StatisticsAnalyzer.__detect_sampling_rate__(model),
"document_bounds": StatisticsAnalyzer.__compute_document_bounds__(model),
"sensor_channels": {},
"knowledge_graph": {},
"points_count": {
'stroke_points': [],
'total': 0
},
"views": {},
"uim_version": f"{model.version.major}.{model.version.minor}.{model.version.patch}",
"strokes_count": len(model.strokes)
}
# Preload stats items
StatisticsAnalyzer.__preload_stats_items__(model, stats)
# Extract stats
for stroke in model.strokes:
# Extract stroke info
StatisticsAnalyzer.__extract_stroke_info__(stroke, stats)
# Extract input configuration
StatisticsAnalyzer.__extract_input_configuration__(model, stroke, stats)
# Extract sensor data info
StatisticsAnalyzer.__extract_sensor_data_info__(model, stroke, stats)
# Extract brush info
StatisticsAnalyzer.__extract_brushes_information(stroke, stats)
# Post process stats
StatisticsAnalyzer.__post_process_sensor_channels_info__(stats)
# Extract views info
StatisticsAnalyzer.__extract_views_info__(model, stats, ignore_predicates)
# Extract knowledge graph info
StatisticsAnalyzer.__extract_kg_info__(model, stats, ignore_predicates)
# Post process stats
StatisticsAnalyzer.__post_process_stats__(stats)
return stats
@staticmethod
def __extract_stroke_info__(stroke: Stroke, stats: Dict[str, Any]):
"""
Extracts stroke information from the given stroke and updates the given stats dictionary.
Parameters
----------
stroke: Stroke
The stroke to extract information from
stats: Dict[str, Any]
The stats dictionary to update.
"""
stats['points_count']['stroke_points'].append(stroke.points_count)
stats['points_count']['total'] += stroke.points_count
@staticmethod
def __extract_properties_info(model: InkModel, ignore_properties: Optional[List[str]] = None):
"""
Extracts properties information from the given model.
Parameters
----------
model: InkModel
The model to extract information from.
ignore_properties: Optional[List[str]]
A list of regular expressions to ignore properties that match them.
"""
props: Dict[str, Any] = dict(model.properties)
result: Dict[str, Any] = {}
for prop, value in props.items():
should_ignore: bool = False
if ignore_properties:
for ip in ignore_properties:
if re.compile(ip).match(prop):
should_ignore = True
break
if not should_ignore:
if prop not in result:
result[prop] = {'documents_count': 0, 'values': {}}
if value not in result[prop]['values']:
result[prop]['values'][value] = {"count": 0}
result[prop]['values'][value]['count'] += 1
return result
@staticmethod
def __post_process_stats__(stats):
strokes_count: int = stats['strokes_count']
for stat_type in ['brushes', 'envs', 'input_devices']:
for k, v in stats[stat_type].items():
stats[stat_type][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2)
for k, v in stats['input_providers'].items():
stats['input_providers'][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2)
if 'sampling_rates' in v and len(v['sampling_rates']):
stats['input_providers'][k]['sampling_rate'] = round(statistics.mean(v['sampling_rates']), 2)
del stats['input_providers'][k]['sampling_rates']
else:
stats['input_providers'][k]['sampling_rate'] = 0
for name, view in stats['views'].items():
for k, v in view['leaf_classes'].items():
stats['views'][name]['leaf_classes'][k]['percent'] = round(
safe_zero_div(v['strokes_count'], strokes_count) * 100, 2)
for k, v in stats['properties'].items():
for vk, vv in v['values'].items():
stats['properties'][k]['values'][vk]['percent'] = round(
safe_zero_div(vv['count'], v['documents_count']) * 100, 2)
for k, v in stats['sensor_channels'].items():
stats['sensor_channels'][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2)
# Stroke stats
if len(stats['points_count']['stroke_points']) > 0:
stats['points_count']['min'] = min(stats['points_count']['stroke_points'])
stats['points_count']['max'] = max(stats['points_count']['stroke_points'])
stats['points_count']['mean'] = round(statistics.mean(stats['points_count']['stroke_points']), 2)
stats['points_count']['std'] = round(statistics.stdev(stats['points_count']['stroke_points']), 2)
stats['points_count']['median'] = round(statistics.median(stats['points_count']['stroke_points']), 2)
else:
stats['points_count']['min'] = 0
stats['points_count']['max'] = 0
stats['points_count']['mean'] = 0
stats['points_count']['std'] = 0
stats['points_count']['median'] = 0
del stats['points_count']['stroke_points']
@staticmethod
def __preload_stats_items__(model: InkModel, stats: Dict[str, Any]):
for env in model.input_configuration.environments:
env_props: Dict[str, Any] = dict(env.properties)
stats['envs'][f'env-{env.id}'] = env_props
stats['envs'][f'env-{env.id}']['strokes_count'] = 0
if 'user.agent' in env_props:
rest = env_props['user.agent']
try:
stats['envs'][f'env-{env.id}']['platform.name'] = rest['platform']['name']
stats['envs'][f'env-{env.id}']['platform.version'] = rest['platform']['version']
stats['envs'][f'env-{env.id}']['os.name'] = rest['os']['name']
except Exception as e:
print(e)
try:
stats['envs'][f'env-{env.id}']['browser.name'] = rest['browser']['name']
stats['envs'][f'env-{env.id}']['browser.version'] = rest['browser']['version']
except Exception as _:
stats['envs'][f'env-{env.id}']['browser.name'] = 'unknown'
stats['envs'][f'env-{env.id}']['browser.version'] = 'unknown'
for dev in model.input_configuration.devices:
stats['input_devices'][f'dev-{dev.id}'] = {"strokes_count": 0}
for ip in model.input_configuration.ink_input_providers:
stats['input_providers'][f'prov-{ip.id}'] = {"strokes_count": 0, "sampling_rates": []}
for brush in model.brushes.vector_brushes:
stats['brushes'][brush.name] = {"strokes_count": 0}
for brush in model.brushes.raster_brushes:
stats['brushes'][brush.name] = {"strokes_count": 0}
area = stats['document_bounds']['width'] * stats['document_bounds']['height']
stats['document_stats'] = {"min_area": area, "max_area": area}
@staticmethod
def __extract_views_info__(model: InkModel, stats: Dict[str, Any], ignore_predicates: Optional[List[str]] = None):
kg: TripleStore = model.knowledge_graph
for v in model.views:
v: InkTree = v
view_info: Dict[str, Any] = {
"assumed_type_predicate": ModelAnalyzer.__assume_view_type_predicate__(model, v),
"statements_count": 0,
"predicates": {},
"leaf_classes": {}
}
if view_info["assumed_type_predicate"] != "unknown":
enumerator: PreOrderEnumerator = PreOrderEnumerator(v.root)
for node in enumerator:
# Calculate predicates per view
sts = kg.all_statements_for(subject=node.uri)
for statement in sts:
should_ignore = False
if ignore_predicates:
for ip in ignore_predicates:
if re.compile(ip).match(statement.predicate):
should_ignore = True
break
if not should_ignore:
if statement.predicate not in view_info["predicates"]:
view_info["predicates"][statement.predicate] = {"occurrence": 0}
view_info["predicates"][statement.predicate]["occurrence"] += 1
if type(node) == StrokeGroupNode:
children_types = [type(n) for n in node.children]
if StrokeGroupNode in children_types:
continue
sts = kg.all_statements_for(subject=node.uri, predicate=view_info["assumed_type_predicate"])
if len(sts) > 0:
sem_type = sts[0].object
if sem_type not in view_info["leaf_classes"]:
view_info["leaf_classes"][sem_type] = {"strokes_count": 0, 'percent': 0}
view_info["leaf_classes"][sem_type]["strokes_count"] += len(node.children)
stats["views"][v.name] = view_info
@staticmethod
def __extract_kg_info__(model: InkModel, stats, ignore_predicates=None):
kg: TripleStore = model.knowledge_graph
stats["knowledge_graph"]["statements_count"] = len(kg.statements)
stats["knowledge_graph"]["predicates"] = {}
for statement in kg.statements:
should_ignore = False
if ignore_predicates:
for ip in ignore_predicates:
if re.compile(ip).match(statement.predicate):
should_ignore = True
break
if not should_ignore:
if statement.predicate not in stats["knowledge_graph"]["predicates"]:
stats["knowledge_graph"]["predicates"][statement.predicate] = {"occurrence": 0}
stats["knowledge_graph"]["predicates"][statement.predicate]["occurrence"] += 1
@staticmethod
def __extract_brushes_information(stroke: Stroke, stats):
style: Style = stroke.style
stats['brushes'][style.brush_uri]["strokes_count"] += 1
@staticmethod
def __extract_input_configuration__(model: InkModel, stroke: Stroke, stats):
try:
sd: SensorData = model.sensor_data.sensor_data_by_id(stroke.sensor_data_id)
except Exception as e:
logger.error(f"Error while extracting input configuration: {e}")
return
ic: InputContext = model.input_configuration.get_input_context(sd.input_context_id)
env: Environment = next(env for env in model.input_configuration.environments if env.id == ic.environment_id)
stats['envs'][f'env-{env.id}']["strokes_count"] += 1
sc: SensorContext = model.input_configuration.get_sensor_context(ic.sensor_context_id)
for scc in sc.sensor_channels_contexts:
try:
input_device: InputDevice = next(
dev for dev in model.input_configuration.devices if dev.id == scc.input_device_id)
stats['input_devices'][f'dev-{input_device.id}']["strokes_count"] += 1
except Exception as e:
logger.error(f"Error while extracting input configuration: {e}")
try:
input_provider: InkInputProvider = next(
prov for prov in model.input_configuration.ink_input_providers if prov.id == scc.input_provider_id)
stats['input_providers'][f'prov-{input_provider.id}']["strokes_count"] += 1
sr = StatisticsAnalyzer.__detect_stroke_sampling_rate(stroke, model)
if sr:
stats['input_providers'][f'prov-{input_provider.id}']["sampling_rates"].append(sr)
except Exception as e:
logger.error(f"Error while extracting input configuration: {e}")
@staticmethod
def __detect_sampling_rate__(model: InkModel) -> float:
"""
Calculates the average sampling rate of the strokes in the ink model.
Parameters
----------
model: InkModel
The ink model to analyze
Returns
-------
sampling_rate: float
The average sampling rate of the strokes in the ink model in milliseconds.
"""
per_stroke_sampling: List[float] = []
for stroke in model.strokes:
try:
sr = StatisticsAnalyzer.__detect_stroke_sampling_rate(stroke, model)
if sr:
per_stroke_sampling.append(sr)
except Exception as e:
logger.error(f"Error while detecting sampling rate: {e}")
if len(per_stroke_sampling) == 0:
return 0
return round(statistics.mean(per_stroke_sampling), 2)
@staticmethod
def __detect_stroke_sampling_rate(stroke: Stroke, model: InkModel) -> float:
"""
Calculates the sampling rate of a stroke in the ink model.
Parameters
----------
stroke: Stroke
The stroke to analyze
model: InkModel
The ink model to analyze
Returns
-------
sampling_rate: float
The sampling rate of the stroke in milliseconds.
"""
layout: str = "xytp"
pos_t: int = layout.index("t")
stride: int = len(layout)
stride_stroke = as_strided_array(model, stroke, layout)
ts = stride_stroke[pos_t::stride]
if len(ts) < 2:
return 0.
diffs: List[float] = [round(ts[j] - ts[j - 1], 2) for j in range(1, len(ts))]
return statistics.mean(diffs)
@staticmethod
def __compute_document_bounds__(model: InkModel) -> Dict[str, float]:
"""
Computes the bounding box of the document.
Parameters
----------
model: InkModel
The ink model to analyze.
Returns
-------
bounds: Dict[str, float]
The bounding box of the document.
"""
if len(model.strokes) == 0:
return {"left": 0, "top": 0, "right": 0, "bottom": 0, "width": 0, "height": 0}
root: StrokeGroupNode = model.ink_tree.root
model.calculate_bounds_recursively(root)
bounds: BoundingBox = root.group_bounding_box
return {
"left": round(bounds.x, 2), "right": round(bounds.x + bounds.width, 2),
"top": round(bounds.y, 2), "bottom": round(bounds.y + bounds.height, 2),
"width": round(bounds.width, 2), "height": round(bounds.height, 2)
}
Classes
class StatisticsAnalyzer
-
Statistics analyzer
Analyze the model and compute statistics.
Expand source code
class StatisticsAnalyzer(ModelAnalyzer): """ Statistics analyzer =================== Analyze the model and compute statistics. """ @staticmethod def merge_stats(*stats): pass @staticmethod def summarize(stats, verbose=False): pass @staticmethod def analyze(model: InkModel, ignore_predicates: Optional[List[str]] = None, ignore_properties: Optional[List[str]] = None): """ Analyze the model and compute statistics. Parameters ---------- model: InkModel Ink model to analyze. ignore_predicates: Optional[List[str]] List of predicates to ignore. ignore_properties: Optional[List[str]] List of properties to ignore. """ # Init stats stats: Dict[str, Any] = { "envs": {}, "input_devices": {}, "input_providers": {}, "brushes": {}, "properties": StatisticsAnalyzer.__extract_properties_info(model, ignore_properties), "sampling_rate": StatisticsAnalyzer.__detect_sampling_rate__(model), "document_bounds": StatisticsAnalyzer.__compute_document_bounds__(model), "sensor_channels": {}, "knowledge_graph": {}, "points_count": { 'stroke_points': [], 'total': 0 }, "views": {}, "uim_version": f"{model.version.major}.{model.version.minor}.{model.version.patch}", "strokes_count": len(model.strokes) } # Preload stats items StatisticsAnalyzer.__preload_stats_items__(model, stats) # Extract stats for stroke in model.strokes: # Extract stroke info StatisticsAnalyzer.__extract_stroke_info__(stroke, stats) # Extract input configuration StatisticsAnalyzer.__extract_input_configuration__(model, stroke, stats) # Extract sensor data info StatisticsAnalyzer.__extract_sensor_data_info__(model, stroke, stats) # Extract brush info StatisticsAnalyzer.__extract_brushes_information(stroke, stats) # Post process stats StatisticsAnalyzer.__post_process_sensor_channels_info__(stats) # Extract views info StatisticsAnalyzer.__extract_views_info__(model, stats, ignore_predicates) # Extract knowledge graph info StatisticsAnalyzer.__extract_kg_info__(model, stats, ignore_predicates) # Post process stats StatisticsAnalyzer.__post_process_stats__(stats) return stats @staticmethod def __extract_stroke_info__(stroke: Stroke, stats: Dict[str, Any]): """ Extracts stroke information from the given stroke and updates the given stats dictionary. Parameters ---------- stroke: Stroke The stroke to extract information from stats: Dict[str, Any] The stats dictionary to update. """ stats['points_count']['stroke_points'].append(stroke.points_count) stats['points_count']['total'] += stroke.points_count @staticmethod def __extract_properties_info(model: InkModel, ignore_properties: Optional[List[str]] = None): """ Extracts properties information from the given model. Parameters ---------- model: InkModel The model to extract information from. ignore_properties: Optional[List[str]] A list of regular expressions to ignore properties that match them. """ props: Dict[str, Any] = dict(model.properties) result: Dict[str, Any] = {} for prop, value in props.items(): should_ignore: bool = False if ignore_properties: for ip in ignore_properties: if re.compile(ip).match(prop): should_ignore = True break if not should_ignore: if prop not in result: result[prop] = {'documents_count': 0, 'values': {}} if value not in result[prop]['values']: result[prop]['values'][value] = {"count": 0} result[prop]['values'][value]['count'] += 1 return result @staticmethod def __post_process_stats__(stats): strokes_count: int = stats['strokes_count'] for stat_type in ['brushes', 'envs', 'input_devices']: for k, v in stats[stat_type].items(): stats[stat_type][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2) for k, v in stats['input_providers'].items(): stats['input_providers'][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2) if 'sampling_rates' in v and len(v['sampling_rates']): stats['input_providers'][k]['sampling_rate'] = round(statistics.mean(v['sampling_rates']), 2) del stats['input_providers'][k]['sampling_rates'] else: stats['input_providers'][k]['sampling_rate'] = 0 for name, view in stats['views'].items(): for k, v in view['leaf_classes'].items(): stats['views'][name]['leaf_classes'][k]['percent'] = round( safe_zero_div(v['strokes_count'], strokes_count) * 100, 2) for k, v in stats['properties'].items(): for vk, vv in v['values'].items(): stats['properties'][k]['values'][vk]['percent'] = round( safe_zero_div(vv['count'], v['documents_count']) * 100, 2) for k, v in stats['sensor_channels'].items(): stats['sensor_channels'][k]['percent'] = round(safe_zero_div(v['strokes_count'], strokes_count) * 100, 2) # Stroke stats if len(stats['points_count']['stroke_points']) > 0: stats['points_count']['min'] = min(stats['points_count']['stroke_points']) stats['points_count']['max'] = max(stats['points_count']['stroke_points']) stats['points_count']['mean'] = round(statistics.mean(stats['points_count']['stroke_points']), 2) stats['points_count']['std'] = round(statistics.stdev(stats['points_count']['stroke_points']), 2) stats['points_count']['median'] = round(statistics.median(stats['points_count']['stroke_points']), 2) else: stats['points_count']['min'] = 0 stats['points_count']['max'] = 0 stats['points_count']['mean'] = 0 stats['points_count']['std'] = 0 stats['points_count']['median'] = 0 del stats['points_count']['stroke_points'] @staticmethod def __preload_stats_items__(model: InkModel, stats: Dict[str, Any]): for env in model.input_configuration.environments: env_props: Dict[str, Any] = dict(env.properties) stats['envs'][f'env-{env.id}'] = env_props stats['envs'][f'env-{env.id}']['strokes_count'] = 0 if 'user.agent' in env_props: rest = env_props['user.agent'] try: stats['envs'][f'env-{env.id}']['platform.name'] = rest['platform']['name'] stats['envs'][f'env-{env.id}']['platform.version'] = rest['platform']['version'] stats['envs'][f'env-{env.id}']['os.name'] = rest['os']['name'] except Exception as e: print(e) try: stats['envs'][f'env-{env.id}']['browser.name'] = rest['browser']['name'] stats['envs'][f'env-{env.id}']['browser.version'] = rest['browser']['version'] except Exception as _: stats['envs'][f'env-{env.id}']['browser.name'] = 'unknown' stats['envs'][f'env-{env.id}']['browser.version'] = 'unknown' for dev in model.input_configuration.devices: stats['input_devices'][f'dev-{dev.id}'] = {"strokes_count": 0} for ip in model.input_configuration.ink_input_providers: stats['input_providers'][f'prov-{ip.id}'] = {"strokes_count": 0, "sampling_rates": []} for brush in model.brushes.vector_brushes: stats['brushes'][brush.name] = {"strokes_count": 0} for brush in model.brushes.raster_brushes: stats['brushes'][brush.name] = {"strokes_count": 0} area = stats['document_bounds']['width'] * stats['document_bounds']['height'] stats['document_stats'] = {"min_area": area, "max_area": area} @staticmethod def __extract_views_info__(model: InkModel, stats: Dict[str, Any], ignore_predicates: Optional[List[str]] = None): kg: TripleStore = model.knowledge_graph for v in model.views: v: InkTree = v view_info: Dict[str, Any] = { "assumed_type_predicate": ModelAnalyzer.__assume_view_type_predicate__(model, v), "statements_count": 0, "predicates": {}, "leaf_classes": {} } if view_info["assumed_type_predicate"] != "unknown": enumerator: PreOrderEnumerator = PreOrderEnumerator(v.root) for node in enumerator: # Calculate predicates per view sts = kg.all_statements_for(subject=node.uri) for statement in sts: should_ignore = False if ignore_predicates: for ip in ignore_predicates: if re.compile(ip).match(statement.predicate): should_ignore = True break if not should_ignore: if statement.predicate not in view_info["predicates"]: view_info["predicates"][statement.predicate] = {"occurrence": 0} view_info["predicates"][statement.predicate]["occurrence"] += 1 if type(node) == StrokeGroupNode: children_types = [type(n) for n in node.children] if StrokeGroupNode in children_types: continue sts = kg.all_statements_for(subject=node.uri, predicate=view_info["assumed_type_predicate"]) if len(sts) > 0: sem_type = sts[0].object if sem_type not in view_info["leaf_classes"]: view_info["leaf_classes"][sem_type] = {"strokes_count": 0, 'percent': 0} view_info["leaf_classes"][sem_type]["strokes_count"] += len(node.children) stats["views"][v.name] = view_info @staticmethod def __extract_kg_info__(model: InkModel, stats, ignore_predicates=None): kg: TripleStore = model.knowledge_graph stats["knowledge_graph"]["statements_count"] = len(kg.statements) stats["knowledge_graph"]["predicates"] = {} for statement in kg.statements: should_ignore = False if ignore_predicates: for ip in ignore_predicates: if re.compile(ip).match(statement.predicate): should_ignore = True break if not should_ignore: if statement.predicate not in stats["knowledge_graph"]["predicates"]: stats["knowledge_graph"]["predicates"][statement.predicate] = {"occurrence": 0} stats["knowledge_graph"]["predicates"][statement.predicate]["occurrence"] += 1 @staticmethod def __extract_brushes_information(stroke: Stroke, stats): style: Style = stroke.style stats['brushes'][style.brush_uri]["strokes_count"] += 1 @staticmethod def __extract_input_configuration__(model: InkModel, stroke: Stroke, stats): try: sd: SensorData = model.sensor_data.sensor_data_by_id(stroke.sensor_data_id) except Exception as e: logger.error(f"Error while extracting input configuration: {e}") return ic: InputContext = model.input_configuration.get_input_context(sd.input_context_id) env: Environment = next(env for env in model.input_configuration.environments if env.id == ic.environment_id) stats['envs'][f'env-{env.id}']["strokes_count"] += 1 sc: SensorContext = model.input_configuration.get_sensor_context(ic.sensor_context_id) for scc in sc.sensor_channels_contexts: try: input_device: InputDevice = next( dev for dev in model.input_configuration.devices if dev.id == scc.input_device_id) stats['input_devices'][f'dev-{input_device.id}']["strokes_count"] += 1 except Exception as e: logger.error(f"Error while extracting input configuration: {e}") try: input_provider: InkInputProvider = next( prov for prov in model.input_configuration.ink_input_providers if prov.id == scc.input_provider_id) stats['input_providers'][f'prov-{input_provider.id}']["strokes_count"] += 1 sr = StatisticsAnalyzer.__detect_stroke_sampling_rate(stroke, model) if sr: stats['input_providers'][f'prov-{input_provider.id}']["sampling_rates"].append(sr) except Exception as e: logger.error(f"Error while extracting input configuration: {e}") @staticmethod def __detect_sampling_rate__(model: InkModel) -> float: """ Calculates the average sampling rate of the strokes in the ink model. Parameters ---------- model: InkModel The ink model to analyze Returns ------- sampling_rate: float The average sampling rate of the strokes in the ink model in milliseconds. """ per_stroke_sampling: List[float] = [] for stroke in model.strokes: try: sr = StatisticsAnalyzer.__detect_stroke_sampling_rate(stroke, model) if sr: per_stroke_sampling.append(sr) except Exception as e: logger.error(f"Error while detecting sampling rate: {e}") if len(per_stroke_sampling) == 0: return 0 return round(statistics.mean(per_stroke_sampling), 2) @staticmethod def __detect_stroke_sampling_rate(stroke: Stroke, model: InkModel) -> float: """ Calculates the sampling rate of a stroke in the ink model. Parameters ---------- stroke: Stroke The stroke to analyze model: InkModel The ink model to analyze Returns ------- sampling_rate: float The sampling rate of the stroke in milliseconds. """ layout: str = "xytp" pos_t: int = layout.index("t") stride: int = len(layout) stride_stroke = as_strided_array(model, stroke, layout) ts = stride_stroke[pos_t::stride] if len(ts) < 2: return 0. diffs: List[float] = [round(ts[j] - ts[j - 1], 2) for j in range(1, len(ts))] return statistics.mean(diffs) @staticmethod def __compute_document_bounds__(model: InkModel) -> Dict[str, float]: """ Computes the bounding box of the document. Parameters ---------- model: InkModel The ink model to analyze. Returns ------- bounds: Dict[str, float] The bounding box of the document. """ if len(model.strokes) == 0: return {"left": 0, "top": 0, "right": 0, "bottom": 0, "width": 0, "height": 0} root: StrokeGroupNode = model.ink_tree.root model.calculate_bounds_recursively(root) bounds: BoundingBox = root.group_bounding_box return { "left": round(bounds.x, 2), "right": round(bounds.x + bounds.width, 2), "top": round(bounds.y, 2), "bottom": round(bounds.y + bounds.height, 2), "width": round(bounds.width, 2), "height": round(bounds.height, 2) }
Ancestors
- ModelAnalyzer
- abc.ABC
Static methods
def analyze(model: InkModel, ignore_predicates: Optional[List[str]] = None, ignore_properties: Optional[List[str]] = None)
-
Analyze the model and compute statistics. Parameters
model
:InkModel
- Ink model to analyze.
ignore_predicates
:Optional[List[str]]
- List of predicates to ignore.
ignore_properties
:Optional[List[str]]
- List of properties to ignore.
Expand source code
@staticmethod def analyze(model: InkModel, ignore_predicates: Optional[List[str]] = None, ignore_properties: Optional[List[str]] = None): """ Analyze the model and compute statistics. Parameters ---------- model: InkModel Ink model to analyze. ignore_predicates: Optional[List[str]] List of predicates to ignore. ignore_properties: Optional[List[str]] List of properties to ignore. """ # Init stats stats: Dict[str, Any] = { "envs": {}, "input_devices": {}, "input_providers": {}, "brushes": {}, "properties": StatisticsAnalyzer.__extract_properties_info(model, ignore_properties), "sampling_rate": StatisticsAnalyzer.__detect_sampling_rate__(model), "document_bounds": StatisticsAnalyzer.__compute_document_bounds__(model), "sensor_channels": {}, "knowledge_graph": {}, "points_count": { 'stroke_points': [], 'total': 0 }, "views": {}, "uim_version": f"{model.version.major}.{model.version.minor}.{model.version.patch}", "strokes_count": len(model.strokes) } # Preload stats items StatisticsAnalyzer.__preload_stats_items__(model, stats) # Extract stats for stroke in model.strokes: # Extract stroke info StatisticsAnalyzer.__extract_stroke_info__(stroke, stats) # Extract input configuration StatisticsAnalyzer.__extract_input_configuration__(model, stroke, stats) # Extract sensor data info StatisticsAnalyzer.__extract_sensor_data_info__(model, stroke, stats) # Extract brush info StatisticsAnalyzer.__extract_brushes_information(stroke, stats) # Post process stats StatisticsAnalyzer.__post_process_sensor_channels_info__(stats) # Extract views info StatisticsAnalyzer.__extract_views_info__(model, stats, ignore_predicates) # Extract knowledge graph info StatisticsAnalyzer.__extract_kg_info__(model, stats, ignore_predicates) # Post process stats StatisticsAnalyzer.__post_process_stats__(stats) return stats
def merge_stats(*stats)
-
Expand source code
@staticmethod def merge_stats(*stats): pass
def summarize(stats, verbose=False)
-
Expand source code
@staticmethod def summarize(stats, verbose=False): pass
Inherited members