Coverage for src / tracekit / reporting / chart_selection.py: 99%
54 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Automated chart type selection for TraceKit reports.
3This module provides intelligent chart type selection based on data
4characteristics to optimize data visualization in reports.
7Example:
8 >>> from tracekit.reporting import auto_select_chart
9 >>> chart_type = auto_select_chart("time_series", (1000, 2))
10 >>> print(chart_type) # "line"
11"""
13from __future__ import annotations
15from typing import TYPE_CHECKING, Literal
17import numpy as np
19if TYPE_CHECKING:
20 from numpy.typing import NDArray
22ChartType = Literal["line", "scatter", "bar", "histogram", "heatmap", "pie", "spectrum"]
25def auto_select_chart(
26 data_type: str,
27 data_shape: tuple[int, ...],
28 *,
29 data: NDArray[np.float64] | None = None,
30) -> ChartType:
31 """Automatically select appropriate chart type based on data characteristics.
33 Args:
34 data_type: Type of data - one of:
35 - "time_series": Time-domain waveform data
36 - "frequency": Frequency-domain spectral data
37 - "distribution": Statistical distribution data
38 - "comparison": Comparative measurements
39 - "correlation": Correlation or scatter data
40 - "categorical": Categorical comparison data
41 - "matrix": 2D matrix data
42 - "parts": Part-to-whole relationships
43 data_shape: Shape of the data array (rows, [columns]).
44 data: Optional actual data array for additional analysis.
46 Returns:
47 Recommended chart type: 'line', 'scatter', 'bar', 'histogram',
48 'heatmap', 'pie', or 'spectrum'.
50 Example:
51 >>> # Time series data → line plot
52 >>> auto_select_chart("time_series", (1000, 2))
53 'line'
55 >>> # Distribution data → histogram
56 >>> auto_select_chart("distribution", (500,))
57 'histogram'
59 >>> # Categorical comparison → bar chart
60 >>> auto_select_chart("categorical", (5,))
61 'bar'
63 >>> # 2D matrix → heatmap
64 >>> auto_select_chart("matrix", (100, 100))
65 'heatmap'
67 References:
68 REPORT-028: Automated Chart Type Selection
69 """
70 # Time series → line plot
71 if data_type == "time_series":
72 return "line"
74 # Frequency data → spectrum plot (log scale)
75 if data_type == "frequency":
76 return "spectrum"
78 # Distribution → histogram or box plot
79 if data_type == "distribution":
80 return "histogram"
82 # Categorical comparison → bar chart
83 if data_type == "categorical":
84 # If very few categories, pie chart might be appropriate
85 if len(data_shape) > 0 and data_shape[0] <= 6 and data is not None and np.all(data >= 0):
86 # Check if data represents parts of a whole
87 total = np.sum(data)
88 if total > 0 and np.allclose(data / total * 100, data / total * 100): 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true
89 return "pie"
90 return "bar"
92 # Comparison (continuous) → scatter plot
93 if data_type == "comparison":
94 # If 2D data with moderate size, scatter plot
95 if len(data_shape) >= 2 and data_shape[0] < 10000:
96 return "scatter"
97 # Large comparison data → bar chart
98 return "bar"
100 # Correlation → scatter plot with potential regression
101 if data_type == "correlation":
102 return "scatter"
104 # 2D matrix → heatmap
105 if data_type == "matrix":
106 return "heatmap"
108 # Parts-to-whole → pie chart
109 if data_type == "parts":
110 return "pie"
112 # Default based on shape
113 if len(data_shape) == 1:
114 # 1D data: histogram for distributions, bar for small sets
115 if data_shape[0] < 20:
116 return "bar"
117 return "histogram"
118 elif len(data_shape) == 2:
119 # 2D data: heatmap for square-ish matrices, scatter for point clouds
120 if data_shape[0] > 50 and data_shape[1] > 50:
121 return "heatmap"
122 return "scatter"
124 # Fallback to line plot
125 return "line"
128def recommend_chart_with_reasoning(
129 data_type: str,
130 data_shape: tuple[int, ...],
131 *,
132 data: NDArray[np.float64] | None = None,
133) -> dict[str, str | ChartType]:
134 """Recommend chart type with reasoning explanation.
136 Args:
137 data_type: Type of data (see auto_select_chart).
138 data_shape: Shape of the data array.
139 data: Optional actual data array.
141 Returns:
142 Dictionary with 'chart_type' and 'reasoning' keys.
144 Example:
145 >>> result = recommend_chart_with_reasoning("time_series", (1000, 2))
146 >>> print(result['chart_type']) # "line"
147 >>> print(result['reasoning']) # "Time series data best shown with line plot"
149 References:
150 REPORT-028: Automated Chart Type Selection
151 """
152 chart_type = auto_select_chart(data_type, data_shape, data=data)
154 # Generate reasoning
155 reasoning_map = {
156 "line": "Time series or sequential data best visualized with line plot",
157 "scatter": "Point data or correlation best shown with scatter plot",
158 "bar": "Categorical or discrete comparison best shown with bar chart",
159 "histogram": "Distribution data best represented as histogram",
160 "heatmap": "2D matrix data best visualized as heatmap",
161 "pie": "Part-to-whole relationship best shown with pie chart",
162 "spectrum": "Frequency domain data best shown with log-scale spectrum plot",
163 }
165 reasoning = reasoning_map.get(
166 chart_type, f"Data characteristics suggest {chart_type} visualization"
167 )
169 return {
170 "chart_type": chart_type,
171 "reasoning": reasoning,
172 }
175def get_axis_scaling(
176 data_type: str,
177 data: NDArray[np.float64] | None = None,
178) -> dict[str, str]:
179 """Recommend axis scaling (linear vs log) based on data type.
181 Args:
182 data_type: Type of data.
183 data: Optional actual data array for range analysis.
185 Returns:
186 Dictionary with 'x_scale' and 'y_scale' keys ('linear' or 'log').
188 Example:
189 >>> scaling = get_axis_scaling("frequency")
190 >>> print(scaling) # {'x_scale': 'log', 'y_scale': 'log'}
192 References:
193 REPORT-028: Automated Chart Type Selection
194 """
195 # Default linear scaling
196 x_scale = "linear"
197 y_scale = "linear"
199 # Frequency data: both axes log
200 if data_type == "frequency":
201 x_scale = "log"
202 y_scale = "log"
204 # Check data range if provided
205 if data is not None and len(data) > 0:
206 # If data spans > 3 orders of magnitude, use log scale
207 data_min = np.min(data[data > 0]) if np.any(data > 0) else 0
208 data_max = np.max(data)
209 if data_min > 0 and data_max / data_min > 1000:
210 y_scale = "log"
212 return {
213 "x_scale": x_scale,
214 "y_scale": y_scale,
215 }
218__all__ = [
219 "ChartType",
220 "auto_select_chart",
221 "get_axis_scaling",
222 "recommend_chart_with_reasoning",
223]